diff --git a/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/README.md b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/README.md
new file mode 100644
index 000000000..6d64508b4
--- /dev/null
+++ b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/README.md
@@ -0,0 +1,38 @@
+```
+=================
+Benchmark results
+=================
+ fail n perf sem% std% peak_memory score weight
+bert-fp16 0 1 149.82 0.1% 0.4% 24616 149.818434 0.00
+bert-fp32 0 1 27.22 0.0% 0.1% 31580 27.217099 0.00
+bert-tf32 0 1 116.65 0.1% 0.5% 31582 116.647454 0.00
+bert-tf32-fp16 0 1 150.16 0.1% 0.4% 24616 150.156416 3.00
+bf16 0 1 270.83 0.1% 0.8% 1804 270.832982 0.00
+convnext_large-fp16 0 1 310.32 1.9% 10.0% 27478 310.322641 0.00
+convnext_large-fp32 0 1 42.56 2.2% 11.7% 49598 42.558568 0.00
+convnext_large-tf32 0 1 124.27 4.0% 21.4% 49598 124.274883 0.00
+convnext_large-tf32-fp16 0 1 309.84 1.6% 8.5% 27478 309.838750 3.00
+davit_large 0 1 290.43 0.7% 5.6% 34016 290.434243 1.00
+davit_large-multi 0 1 290.77 0.7% 5.4% 34260 290.773492 5.00
+dlrm 0 1 418230.56 0.1% 0.4% 7120 418230.564140 1.00
+focalnet 0 1 381.31 0.5% 3.5% 25794 381.313064 2.00
+fp16 0 1 252.71 0.1% 0.5% 1804 252.706435 0.00
+fp32 0 1 18.93 0.0% 0.3% 2182 18.931403 0.00
+llama 0 1 453.97 11.5% 71.6% 28442 453.966564 1.00
+reformer 0 1 55.60 0.0% 0.2% 25420 55.599417 1.00
+regnet_y_128gf 0 1 78.11 0.9% 6.5% 31570 78.106848 2.00
+resnet152 0 1 637.53 0.8% 5.9% 35958 637.529996 1.00
+resnet152-multi 0 1 638.26 0.7% 5.6% 35422 638.255791 5.00
+resnet50 0 1 1013.93 1.6% 12.6% 4746 1013.930919 1.00
+rwkv 1 1 NaN NaN NaN 1574 NaN 1.00
+stargan 0 1 38.18 3.0% 22.9% 37442 38.177830 1.00
+super-slomo 0 1 41.83 1.2% 9.6% 33816 41.828308 1.00
+t5 0 1 46.26 0.6% 4.3% 35460 46.264485 2.00
+tf32 0 1 133.34 0.0% 0.4% 2182 133.344138 0.00
+whisper 0 1 214.83 0.1% 0.5% 36740 214.828130 1.00
+
+Scores
+------
+Failure rate: 3.70% (FAIL)
+Score: 245.06
+```
diff --git a/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/badge.svg b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/badge.svg
new file mode 100644
index 000000000..544903e36
--- /dev/null
+++ b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/badge.svg
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/bert-fp16.D0.data b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/bert-fp16.D0.data
new file mode 100644
index 000000000..53b26543f
--- /dev/null
+++ b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/bert-fp16.D0.data
@@ -0,0 +1,445 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "hf", "install_group": "torch", "install_variant": "cuda", "run_name": "majaguma.2024-04-02_16:55:21.895752", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "b7a983fcf74c005cfc04d84913c69872", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "argv": {"--precision": "fp16", "--num-workers": 8, "--model": "Bert", "--batch-size": 32}, "plan": {"method": "per_gpu"}, "tags": ["huggingface", "language-modeling", "nlp", "precision-showcase", "transformer"], "weight": 0.0, "name": "bert-fp16", "tag": ["bert-fp16", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "trustingspider", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-b19ad74c-adf3-683a-1bdd-0ce516ef4aae": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 73, "power": 105.551, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712077901.108502, "milabench": {"tag": "paice-v1-10-g1243bba", "commit": "1243bbae76bfc8105d553472dcfce834b54a723e", "date": "2024-04-02 12:02:23 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-fp16.D0-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "fp16", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712077901.124953}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 10.47928237915039}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [1, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 10.324040412902832}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [2, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 11.062586784362793}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [3, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 10.039785385131836}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.97, "temperature": 72, "power": 312.497}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [4, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.756427764892578}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [5, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.622476577758789}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [6, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.533597946166992}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [7, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.476072311401367}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [8, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.44356918334961}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [9, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.426374435424805}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [10, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.416016578674316}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [11, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.406279563903809}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [12, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.395854949951172}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [13, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.3849458694458}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [14, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.377933502197266}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [15, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.374731063842773}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [16, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.374163627624512}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [17, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.373811721801758}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [18, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.372345924377441}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 72, "power": 262.986}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [19, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.369985580444336}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [20, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.367439270019531}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [21, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.366805076599121}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [22, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.366157531738281}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [23, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.36543083190918}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [24, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.365005493164062}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [25, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.364080429077148}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [26, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.363504409790039}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [27, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.362876892089844}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [28, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.362106323242188}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [29, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.36214828491211}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [30, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.361862182617188}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 152.87724257824746, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [1, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [31, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.36141586303711}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [32, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.361193656921387}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 73, "power": 346.639}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [33, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.361042022705078}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [34, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.360857963562012}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 154.74140036061542, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [2, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [35, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.360292434692383}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [36, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.36062240600586}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [37, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.360210418701172}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [38, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.359718322753906}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [39, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.359888076782227}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 149.99908125562732, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [3, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [40, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.36003303527832}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [41, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.359695434570312}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [42, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.35957145690918}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [43, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.359235763549805}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [44, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.359640121459961}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 150.12722221809477, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [4, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [45, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.35922622680664}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [46, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358892440795898}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [47, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.35894775390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 73, "power": 311.874}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [48, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.359354019165039}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [49, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.359068870544434}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 150.01328752070046, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [5, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [50, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358457565307617}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [51, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.35875129699707}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [52, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358539581298828}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [53, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358678817749023}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [54, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358731269836426}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 150.35282621139308, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [6, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [55, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.359058380126953}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [56, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358646392822266}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [57, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.359306335449219}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [58, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358448028564453}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [59, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358570098876953}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 150.2284956640488, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [7, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [60, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358451843261719}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [61, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358207702636719}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.97, "temperature": 74, "power": 272.101}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [62, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358234405517578}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [63, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358455657958984}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [64, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358585357666016}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 149.71168850521985, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [8, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [65, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358243942260742}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [66, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358636856079102}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [67, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358400344848633}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [68, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358379364013672}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [69, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.35833740234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 150.1134768755302, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [9, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [70, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358665466308594}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [71, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358572959899902}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [72, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358451843261719}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [73, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358033180236816}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [74, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.35838508605957}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 149.6994876814785, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [10, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [75, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358282089233398}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [76, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358419418334961}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 75, "power": 343.577}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [77, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.35842227935791}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [78, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358055114746094}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [79, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.3580322265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 149.679308993346, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [11, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [80, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358024597167969}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [81, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358108520507812}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [82, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.357969284057617}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [83, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358566284179688}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [84, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358064651489258}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 149.81116457198465, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [12, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [85, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358379364013672}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [86, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358316421508789}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [87, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358358383178711}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [88, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358470916748047}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [89, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358149528503418}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 150.0723919204146, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [13, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [90, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358348846435547}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [91, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.35792350769043}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 75, "power": 314.376}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [92, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358451843261719}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [93, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358392715454102}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [94, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358460426330566}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 149.99273955456914, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [14, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [95, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358633995056152}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [96, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358306884765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [97, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358476638793945}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [98, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.357904434204102}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [99, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358171463012695}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 149.66613708208294, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [15, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [100, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358545303344727}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [101, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.357903480529785}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [102, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358219146728516}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [103, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358108520507812}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [104, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358248710632324}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 149.6258571309043, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [16, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [105, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.357797622680664}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 76, "power": 261.322}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [106, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.35807991027832}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [107, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.357889175415039}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [108, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.35851001739502}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [109, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358098983764648}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 149.75822825712604, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [17, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [110, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358307838439941}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [111, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358318328857422}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [112, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358499526977539}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [113, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358034133911133}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [114, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.357908248901367}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 149.1353529309938, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [18, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [115, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358236312866211}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [116, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358089447021484}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [117, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.35861587524414}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [118, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358320236206055}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [119, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358344078063965}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 149.50298880675425, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [19, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [120, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.357931137084961}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 75, "power": 283.842}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [121, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.357934951782227}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [122, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358261108398438}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [123, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358291625976562}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [124, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358189582824707}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 149.4349718482305, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [20, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [125, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358278274536133}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [126, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358306884765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [127, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.357937812805176}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [128, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358482360839844}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [129, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.357876777648926}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 150.06292987443084, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [21, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [130, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358512878417969}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [131, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358345031738281}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [132, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358097076416016}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [133, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358293533325195}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [134, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.357551574707031}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 76, "power": 330.688}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 149.64640154660455, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [22, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [135, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.35798454284668}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [136, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.35827922821045}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [137, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358026504516602}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [138, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358270645141602}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [139, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.35817813873291}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 149.42670376474726, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [23, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [140, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.35809326171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [141, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358072280883789}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [142, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358430862426758}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [143, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.357963562011719}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [144, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358388900756836}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 149.52963472130756, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [24, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [145, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.35820198059082}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [146, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358226776123047}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [147, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.35807991027832}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [148, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.35821533203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [149, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358162879943848}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 75, "power": 274.318}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 149.53146036506402, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [25, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [150, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358267784118652}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [151, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358076095581055}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [152, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358078002929688}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [153, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358617782592773}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [154, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358043670654297}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 148.7045908133323, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [26, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [155, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.357868194580078}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [156, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358116149902344}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [157, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.35775375366211}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [158, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358063697814941}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [159, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.357721328735352}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 149.36469133502277, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [27, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [160, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358312606811523}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [161, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.357690811157227}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [162, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.357881546020508}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [163, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358210563659668}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 77, "power": 247.265}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [164, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.35821533203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 149.29060277984328, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [28, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [165, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.35821533203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [166, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358190536499023}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [167, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.357914924621582}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [168, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.357951164245605}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [169, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.357860565185547}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 149.399675175413, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [29, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [170, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.357804298400879}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [171, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.357797622680664}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [172, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358224868774414}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [173, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.35790729522705}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [174, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.3578519821167}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 149.32016976490075, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [30, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [175, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.357871055603027}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [176, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358051300048828}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [177, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358205795288086}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [178, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358312606811523}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 76, "power": 332.917}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [179, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.357698440551758}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 149.36037534792553, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [31, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 76, "power": 290.907}}}, "pipe": "data"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-fp16.D0-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "fp16", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712077943.0068586, "return_code": 0}, "pipe": null}
diff --git a/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/bert-fp32.D0.data b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/bert-fp32.D0.data
new file mode 100644
index 000000000..69c6cb768
--- /dev/null
+++ b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/bert-fp32.D0.data
@@ -0,0 +1,228 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "hf", "install_group": "torch", "install_variant": "cuda", "run_name": "majaguma.2024-04-02_16:55:21.895752", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "b7a983fcf74c005cfc04d84913c69872", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "argv": {"--precision": "fp32", "--num-workers": 8, "--model": "Bert", "--batch-size": 32}, "plan": {"method": "per_gpu"}, "tags": ["huggingface", "language-modeling", "nlp", "precision-showcase", "transformer"], "weight": 0.0, "name": "bert-fp32", "tag": ["bert-fp32", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "trustingspider", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-b19ad74c-adf3-683a-1bdd-0ce516ef4aae": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0.01, "memory": 0.010771942138671876}, "temperature": 74, "power": 106.843, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712077836.003601, "milabench": {"tag": "paice-v1-10-g1243bba", "commit": "1243bbae76bfc8105d553472dcfce834b54a723e", "date": "2024-04-02 12:02:23 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-fp32.D0-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "fp32", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712077836.0194106}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 10.479292869567871}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [29670.375, 81920.0], "load": 1.0, "temperature": 75, "power": 288.505}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [1, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 10.323901176452637}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [2, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 11.044936180114746}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [3, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.83090591430664}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [4, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.708606719970703}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31580.375, 81920.0], "load": 1.0, "temperature": 75, "power": 275.217}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [5, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.565232276916504}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [6, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.491073608398438}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [7, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.468774795532227}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [8, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.4805908203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31580.375, 81920.0], "load": 1.0, "temperature": 76, "power": 311.923}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 27.254101107707164, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [1, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [9, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.515676498413086}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [10, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.562061309814453}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 27.278152347016363, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [2, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [11, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.608406066894531}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31580.375, 81920.0], "load": 1.0, "temperature": 77, "power": 303.085}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [12, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.640921592712402}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 27.326843910432217, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [3, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [13, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.648000717163086}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [14, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.63291072845459}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 27.341786224266176, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [4, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [15, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.614981651306152}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31580.375, 81920.0], "load": 1.0, "temperature": 78, "power": 300.483}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [16, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.622065544128418}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 27.297388291665634, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [5, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [17, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.669747352600098}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [18, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.752553939819336}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 27.274687245700036, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [6, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31580.375, 81920.0], "load": 1.0, "temperature": 78, "power": 301.928}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [19, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.848938941955566}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [20, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.93950080871582}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 27.246857225515352, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [7, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [21, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 10.007094383239746}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [22, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 10.038527488708496}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 27.26535445752342, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [8, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [23, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 10.016993522644043}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31580.375, 81920.0], "load": 1.0, "temperature": 78, "power": 285.573}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [24, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.91185188293457}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 27.215620658582264, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [9, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [25, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.875959396362305}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [26, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.918696403503418}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 27.226982677684816, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [10, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31580.375, 81920.0], "load": 1.0, "temperature": 78, "power": 293.654}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [27, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 10.017876625061035}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [28, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 10.246811866760254}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 27.19422325556739, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [11, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [29, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 10.431685447692871}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [30, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 10.560530662536621}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 27.212896789111443, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [12, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31580.375, 81920.0], "load": 1.0, "temperature": 78, "power": 308.917}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [31, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 10.64004135131836}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [32, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 10.689648628234863}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 27.18328096857228, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [13, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [33, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 10.736746788024902}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [34, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 10.803998947143555}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 27.230739313874587, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [14, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31580.375, 81920.0], "load": 1.0, "temperature": 79, "power": 297.953}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [35, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 10.90011978149414}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [36, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 11.020580291748047}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 27.202090688085324, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [15, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [37, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 11.152454376220703}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [38, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 11.279325485229492}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 27.19911093178133, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [16, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [39, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 11.385612487792969}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31580.375, 81920.0], "load": 1.0, "temperature": 79, "power": 307.212}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [40, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 11.46435546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 27.15907313311105, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [17, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [41, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 11.513260841369629}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [42, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 11.550541877746582}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 27.186907108381124, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [18, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31580.375, 81920.0], "load": 1.0, "temperature": 79, "power": 299.515}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [43, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 11.602174758911133}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [44, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 11.679553985595703}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 27.187766579635632, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [19, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [45, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 11.78598690032959}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [46, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 11.926142692565918}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 27.20186679611355, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [20, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [47, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 12.089705467224121}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31580.375, 81920.0], "load": 1.0, "temperature": 79, "power": 299.9}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [48, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 12.263202667236328}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 27.193978093426953, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [21, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [49, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 12.435498237609863}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [50, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 12.600459098815918}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 27.169159409590325, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [22, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31580.375, 81920.0], "load": 1.0, "temperature": 79, "power": 294.643}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [51, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 12.752622604370117}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [52, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 12.883284568786621}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 27.188029404963412, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [23, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [53, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 12.991811752319336}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [54, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 13.081356048583984}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 27.19309180509355, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [24, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31580.375, 81920.0], "load": 1.0, "temperature": 79, "power": 300.951}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [55, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 13.160579681396484}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [56, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 13.241079330444336}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 27.224704317420887, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [25, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [57, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 13.328137397766113}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [58, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 13.415624618530273}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 27.180507871153157, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [26, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31580.375, 81920.0], "load": 1.0, "temperature": 79, "power": 301.428}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [59, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 13.503317832946777}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [60, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 13.589831352233887}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 27.182520064371573, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [27, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [61, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 13.672152519226074}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [62, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 13.75645637512207}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 27.187741851956112, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [28, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31580.375, 81920.0], "load": 1.0, "temperature": 79, "power": 288.884}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [63, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 13.840280532836914}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [64, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 13.907486915588379}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 27.19052326449445, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [29, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [65, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 13.961742401123047}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [66, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 13.999088287353516}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 27.17834205619532, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [30, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31580.375, 81920.0], "load": 1.0, "temperature": 78, "power": 292.988}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [67, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 14.025399208068848}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [68, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 14.040262222290039}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 27.222413323563753, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [31, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31580.375, 81920.0], "load": 1.0, "temperature": 78, "power": 299.773}}}, "pipe": "data"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-fp32.D0-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "fp32", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712077898.4381008, "return_code": 0}, "pipe": null}
diff --git a/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/bert-tf32-fp16.D0.data b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/bert-tf32-fp16.D0.data
new file mode 100644
index 000000000..d6d1bc587
--- /dev/null
+++ b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/bert-tf32-fp16.D0.data
@@ -0,0 +1,445 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "hf", "install_group": "torch", "install_variant": "cuda", "run_name": "majaguma.2024-04-02_16:55:21.895752", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "b7a983fcf74c005cfc04d84913c69872", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "argv": {"--precision": "tf32-fp16", "--num-workers": 8, "--model": "Bert", "--batch-size": 32}, "plan": {"method": "per_gpu"}, "tags": ["huggingface", "language-modeling", "nlp", "precision-showcase", "transformer"], "weight": 3.0, "name": "bert-tf32-fp16", "tag": ["bert-tf32-fp16", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "trustingspider", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-b19ad74c-adf3-683a-1bdd-0ce516ef4aae": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 73, "power": 105.582, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712077990.754599, "milabench": {"tag": "paice-v1-10-g1243bba", "commit": "1243bbae76bfc8105d553472dcfce834b54a723e", "date": "2024-04-02 12:02:23 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-tf32-fp16.D0-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712077990.7707229}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 10.47928237915039}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [1, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 10.324040412902832}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [2, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 11.062586784362793}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [3, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 10.039785385131836}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 73, "power": 307.221}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [4, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.756427764892578}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [5, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.622476577758789}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [6, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.533597946166992}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [7, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.476072311401367}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [8, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.44356918334961}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [9, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.426374435424805}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [10, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.416016578674316}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [11, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.406279563903809}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [12, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.395854949951172}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [13, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.3849458694458}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [14, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.377933502197266}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [15, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.374731063842773}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [16, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.374163627624512}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [17, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.373811721801758}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 73, "power": 309.24}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [18, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.372345924377441}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [19, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.369985580444336}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [20, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.367439270019531}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [21, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.366805076599121}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [22, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.366157531738281}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [23, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.36543083190918}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [24, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.365005493164062}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [25, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.364080429077148}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [26, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.363504409790039}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [27, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.362876892089844}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [28, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.362106323242188}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [29, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.36214828491211}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [30, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.361862182617188}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 152.95751583105746, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [1, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [31, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.36141586303711}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [32, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.361193656921387}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 75, "power": 250.925}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [33, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.361042022705078}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [34, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.360857963562012}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 153.1054651219453, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [2, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [35, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.360292434692383}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [36, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.36062240600586}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [37, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.360210418701172}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [38, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.359718322753906}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [39, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.359888076782227}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 149.57620004980626, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [3, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [40, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.36003303527832}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [41, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.359695434570312}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [42, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.35957145690918}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [43, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.359235763549805}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [44, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.359640121459961}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 149.71340498561378, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [4, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [45, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.35922622680664}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [46, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358892440795898}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 75, "power": 255.027}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [47, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.35894775390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [48, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.359354019165039}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [49, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.359068870544434}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 149.53156811074192, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [5, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [50, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358457565307617}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [51, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.35875129699707}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [52, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358539581298828}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [53, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358678817749023}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [54, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358731269836426}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 149.6244679747852, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [6, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [55, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.359058380126953}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [56, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358646392822266}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [57, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.359306335449219}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [58, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358448028564453}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [59, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358570098876953}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 149.9950361955178, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [7, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [60, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358451843261719}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [61, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358207702636719}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 75, "power": 257.893}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [62, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358234405517578}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [63, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358455657958984}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [64, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358585357666016}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 149.60892610090096, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [8, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [65, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358243942260742}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [66, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358636856079102}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [67, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358400344848633}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [68, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358379364013672}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [69, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.35833740234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 149.8980747897913, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [9, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [70, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358665466308594}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [71, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358572959899902}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [72, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358451843261719}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [73, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358033180236816}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [74, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.35838508605957}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 149.78665044826744, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [10, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [75, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358282089233398}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [76, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358419418334961}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 76, "power": 279.627}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [77, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.35842227935791}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [78, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358055114746094}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [79, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.3580322265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 149.7427044109675, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [11, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [80, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358024597167969}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [81, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358108520507812}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [82, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.357969284057617}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [83, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358566284179688}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [84, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358064651489258}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 149.79841893592229, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [12, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [85, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358379364013672}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [86, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358316421508789}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [87, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358358383178711}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [88, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358470916748047}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [89, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358149528503418}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 149.5930821337377, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [13, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [90, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358348846435547}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 76, "power": 318.696}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [91, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.35792350769043}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [92, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358451843261719}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [93, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358392715454102}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [94, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358460426330566}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 149.58623167486624, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [14, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [95, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358633995056152}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [96, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358306884765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [97, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358476638793945}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [98, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.357904434204102}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [99, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358171463012695}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 149.6818004880261, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [15, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [100, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358545303344727}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [101, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.357903480529785}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [102, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358219146728516}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [103, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358108520507812}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [104, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358248710632324}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 149.66072264942693, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [16, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [105, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.357797622680664}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 75, "power": 249.228}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [106, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.35807991027832}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [107, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.357889175415039}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [108, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.35851001739502}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [109, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358098983764648}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 150.1716942075641, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [17, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [110, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358307838439941}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [111, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358318328857422}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [112, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358499526977539}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [113, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358034133911133}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [114, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.357908248901367}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 149.71687028624214, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [18, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [115, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358236312866211}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [116, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358089447021484}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [117, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.35861587524414}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [118, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358320236206055}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [119, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358344078063965}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 76, "power": 302.024}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 150.22178843427483, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [19, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [120, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.357931137084961}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [121, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.357934951782227}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [122, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358261108398438}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [123, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358291625976562}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [124, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358189582824707}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 150.38647792765164, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [20, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [125, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358278274536133}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [126, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358306884765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [127, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.357937812805176}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [128, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358482360839844}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [129, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.357876777648926}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 150.32605505247176, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [21, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [130, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358512878417969}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [131, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358345031738281}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [132, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358097076416016}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [133, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358293533325195}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [134, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.357551574707031}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 76, "power": 300.199}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 150.50163474123144, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [22, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [135, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.35798454284668}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [136, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.35827922821045}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [137, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358026504516602}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [138, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358270645141602}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [139, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.35817813873291}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 150.25221110778236, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [23, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [140, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.35809326171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [141, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358072280883789}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [142, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358430862426758}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [143, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.357963562011719}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [144, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358388900756836}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 150.13450818014954, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [24, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [145, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.35820198059082}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [146, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358226776123047}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [147, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.35807991027832}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [148, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.35821533203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 77, "power": 335.337}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [149, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358162879943848}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 150.06195396868884, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [25, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [150, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358267784118652}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [151, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358076095581055}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [152, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358078002929688}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [153, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358617782592773}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [154, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358043670654297}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 150.7410177353864, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [26, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [155, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.357868194580078}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [156, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358116149902344}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [157, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.35775375366211}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [158, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358063697814941}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [159, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.357721328735352}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 150.3987521174899, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [27, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [160, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358312606811523}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [161, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.357690811157227}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [162, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.357881546020508}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [163, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358210563659668}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 77, "power": 280.502}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [164, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.35821533203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 150.54771507550964, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [28, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [165, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.35821533203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [166, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358190536499023}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [167, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.357914924621582}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [168, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.357951164245605}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [169, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.357860565185547}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 150.4572048403527, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [29, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [170, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.357804298400879}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [171, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.357797622680664}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [172, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358224868774414}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [173, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.35790729522705}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [174, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.3578519821167}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 150.62833744853714, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [30, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [175, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.357871055603027}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [176, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358051300048828}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [177, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358205795288086}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 76, "power": 280.599}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [178, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.358312606811523}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [179, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.357698440551758}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 150.76659636360603, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [31, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 76, "power": 295.533}}}, "pipe": "data"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-tf32-fp16.D0-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712078032.751831, "return_code": 0}, "pipe": null}
diff --git a/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/bert-tf32.D0.data b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/bert-tf32.D0.data
new file mode 100644
index 000000000..dd763a609
--- /dev/null
+++ b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/bert-tf32.D0.data
@@ -0,0 +1,373 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "hf", "install_group": "torch", "install_variant": "cuda", "run_name": "majaguma.2024-04-02_16:55:21.895752", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "b7a983fcf74c005cfc04d84913c69872", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "argv": {"--precision": "tf32", "--num-workers": 8, "--model": "Bert", "--batch-size": 32}, "plan": {"method": "per_gpu"}, "tags": ["huggingface", "language-modeling", "nlp", "precision-showcase", "transformer"], "weight": 0.0, "name": "bert-tf32", "tag": ["bert-tf32", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "trustingspider", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-b19ad74c-adf3-683a-1bdd-0ce516ef4aae": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 73, "power": 104.985, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712077945.65208, "milabench": {"tag": "paice-v1-10-g1243bba", "commit": "1243bbae76bfc8105d553472dcfce834b54a723e", "date": "2024-04-02 12:02:23 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-tf32.D0-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "tf32", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712077945.6682127}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 10.479286193847656}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [1, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 10.323932647705078}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [2, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 11.044816970825195}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [3, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.830974578857422}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31582.375, 81920.0], "load": 0.98, "temperature": 73, "power": 256.009}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [4, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.708649635314941}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [5, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.565240859985352}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [6, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.491065979003906}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [7, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.468748092651367}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [8, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.480533599853516}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [9, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.515584945678711}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [10, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.561928749084473}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [11, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.608244895935059}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [12, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.640739440917969}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [13, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.647823333740234}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [14, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.632768630981445}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31582.375, 81920.0], "load": 0.99, "temperature": 74, "power": 320.503}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [15, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.614849090576172}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [16, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.621868133544922}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [17, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.669413566589355}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [18, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.752106666564941}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [19, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.848526000976562}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [20, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.939350128173828}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [21, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 10.007367134094238}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [22, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 10.03938102722168}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [23, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 10.01879596710205}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 118.70462377422068, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [1, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [24, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.915424346923828}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [25, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.861414909362793}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [26, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 10.006791114807129}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31582.375, 81920.0], "load": 0.99, "temperature": 74, "power": 300.431}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [27, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 10.027995109558105}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 116.58564447113356, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [2, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [28, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 10.285082817077637}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [29, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 10.481186866760254}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [30, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 10.615607261657715}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [31, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 10.69710636138916}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 116.39533726862513, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [3, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [32, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 10.744179725646973}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [33, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 10.784246444702148}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [34, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 10.841986656188965}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [35, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 10.928215026855469}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 115.9827451274002, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [4, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [36, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 11.041173934936523}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [37, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 11.170112609863281}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31582.375, 81920.0], "load": 0.99, "temperature": 75, "power": 322.444}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [38, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 11.299582481384277}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [39, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 11.414661407470703}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 116.51154639482996, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [5, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [40, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 11.506692886352539}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [41, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 11.572625160217285}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [42, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 11.624845504760742}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [43, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 11.687198638916016}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 116.41696221151335, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [6, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [44, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 11.77263069152832}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [45, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 11.880777359008789}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [46, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 12.014769554138184}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [47, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 12.169602394104004}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 116.64782046459099, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [7, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [48, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 12.335892677307129}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [49, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 12.508112907409668}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31582.375, 81920.0], "load": 1.0, "temperature": 74, "power": 304.768}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [50, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 12.685002326965332}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [51, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 12.864999771118164}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 116.46253189897377, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [8, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [52, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 13.042545318603516}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [53, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 13.21193790435791}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [54, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 13.368173599243164}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [55, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 13.505072593688965}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 117.37462078541034, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [9, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [56, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 13.626041412353516}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [57, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 13.737335205078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [58, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 13.839637756347656}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [59, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 13.941758155822754}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 116.93688403764692, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [10, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [60, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 14.044066429138184}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31582.375, 81920.0], "load": 0.99, "temperature": 76, "power": 291.007}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [61, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 14.145570755004883}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [62, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 14.23983097076416}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [63, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 14.333688735961914}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 116.18791355192967, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [11, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [64, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 14.425797462463379}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [65, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 14.513972282409668}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [66, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 14.597168922424316}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [67, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 14.675834655761719}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 116.15515117232664, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [12, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [68, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 14.757519721984863}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [69, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 14.833535194396973}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [70, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 14.900604248046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [71, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 14.956380844116211}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 116.30099918295276, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [13, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [72, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 14.990116119384766}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31582.375, 81920.0], "load": 0.99, "temperature": 76, "power": 310.928}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [73, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 15.019303321838379}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [74, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 15.04137897491455}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [75, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 15.049532890319824}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 116.28659638781434, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [14, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [76, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 15.041813850402832}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [77, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 15.022936820983887}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [78, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 15.001933097839355}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [79, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 14.978167533874512}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 116.40177699316514, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [15, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [80, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 14.95536994934082}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [81, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 14.924501419067383}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [82, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 14.874979019165039}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [83, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 14.806092262268066}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31582.375, 81920.0], "load": 0.98, "temperature": 76, "power": 251.316}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 116.4368992015761, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [16, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [84, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 14.719812393188477}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [85, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 14.609824180603027}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [86, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 14.481300354003906}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [87, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 14.328900337219238}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 116.25418389498418, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [17, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [88, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 14.169635772705078}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [89, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 14.010271072387695}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [90, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 13.85876178741455}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [91, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 13.748523712158203}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 115.68765018092856, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [18, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [92, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 13.666166305541992}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [93, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 13.625612258911133}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [94, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 13.650134086608887}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31582.375, 81920.0], "load": 0.99, "temperature": 76, "power": 306.155}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [95, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 13.727069854736328}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 116.18837317400578, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [19, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [96, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 13.84864330291748}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [97, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 13.988987922668457}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [98, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 14.1289644241333}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [99, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 14.244298934936523}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 116.08986301166708, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [20, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [100, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 14.312968254089355}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [101, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 14.326717376708984}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [102, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 14.288104057312012}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [103, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 14.236703872680664}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 116.1493113189736, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [21, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [104, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 14.258355140686035}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [105, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 14.536263465881348}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [106, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 15.127537727355957}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31582.375, 81920.0], "load": 0.99, "temperature": 76, "power": 309.656}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [107, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 15.74303913116455}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 116.12458767141415, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [22, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [108, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 16.15192413330078}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [109, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 16.16707420349121}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [110, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 15.868474960327148}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [111, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 15.518776893615723}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 116.23488520032957, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [23, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [112, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 15.283734321594238}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [113, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 15.241960525512695}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [114, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 15.301491737365723}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [115, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 15.411982536315918}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 116.40753523254803, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [24, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [116, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 15.539347648620605}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [117, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 15.672064781188965}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31582.375, 81920.0], "load": 0.98, "temperature": 77, "power": 287.576}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [118, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 15.800228118896484}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [119, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 15.920104026794434}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 116.72236355410804, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [25, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [120, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 16.03019905090332}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [121, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 16.13632583618164}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [122, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 16.23299217224121}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [123, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 16.31147003173828}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 116.97151478235925, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [26, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [124, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 16.37289810180664}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [125, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 16.41785430908203}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [126, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 16.443866729736328}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [127, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 16.45807647705078}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 117.68319859829897, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [27, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [128, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 16.459491729736328}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [129, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 16.45636749267578}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31582.375, 81920.0], "load": 1.0, "temperature": 76, "power": 321.38}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [130, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 16.44569206237793}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [131, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 16.440540313720703}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 116.6127997525185, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [28, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [132, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 16.448402404785156}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [133, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 16.4744815826416}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [134, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 16.53107261657715}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [135, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 16.61451530456543}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 118.09120963152789, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [29, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [136, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 16.71721076965332}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [137, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 16.82810401916504}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [138, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 16.940759658813477}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [139, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 17.044357299804688}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 118.18762217616587, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [30, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [140, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 17.134565353393555}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31582.375, 81920.0], "load": 0.99, "temperature": 77, "power": 270.625}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [141, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 17.20937156677246}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [142, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 17.272945404052734}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [143, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 17.317352294921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 117.97527645220266, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [31, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31582.375, 81920.0], "load": 0.98, "temperature": 77, "power": 282.746}}}, "pipe": "data"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-tf32.D0-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "tf32", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712077987.9582925, "return_code": 0}, "pipe": null}
diff --git a/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/bf16.D0.data b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/bf16.D0.data
new file mode 100644
index 000000000..4c2277b92
--- /dev/null
+++ b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/bf16.D0.data
@@ -0,0 +1,110 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/flops", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "flops", "install_group": "torch", "install_variant": "cuda", "run_name": "majaguma.2024-04-02_16:55:21.895752", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "b7a983fcf74c005cfc04d84913c69872", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops", "plan": {"method": "per_gpu"}, "tags": ["diagnostic", "flops"], "argv": {"--number": 10, "--repeat": 90, "--m": 8192, "--n": 8192, "--dtype": "bf16"}, "weight": 0.0, "name": "bf16", "tag": ["bf16", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "trustingspider", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-b19ad74c-adf3-683a-1bdd-0ce516ef4aae": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 70, "power": 100.567, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712077272.967851, "milabench": {"tag": "paice-v1-10-g1243bba", "commit": "1243bbae76bfc8105d553472dcfce834b54a723e", "date": "2024-04-02 12:02:23 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "10", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "bf16"], "time": 1712077272.9776273}, "pipe": null}
+{"event": "data", "data": {"task": "train", "rate": 187.68587817605857, "units": "Tflops", "t": 1712077274.8174868}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [882.5, 81920.0], "load": 0, "temperature": 67, "power": 63.05}}, "t": 1712077274.3348703}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 274.80446071776925, "units": "Tflops", "t": 1712077274.8981118}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 0.13, "temperature": 71, "power": 94.774}}, "t": 1712077274.842277}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 272.7718821433681, "units": "Tflops", "t": 1712077274.978844}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 272.75010311196337, "units": "Tflops", "t": 1712077275.0595205}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 272.6452897749511, "units": "Tflops", "t": 1712077275.1402483}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 272.6541554759143, "units": "Tflops", "t": 1712077275.2209487}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 272.62433677353187, "units": "Tflops", "t": 1712077275.3016598}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 272.77914259343487, "units": "Tflops", "t": 1712077275.3823252}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 73, "power": 301.556}}, "t": 1712077275.351975}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 272.6888178399463, "units": "Tflops", "t": 1712077275.4630694}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 272.621919326757, "units": "Tflops", "t": 1712077275.5437965}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 269.35058367709536, "units": "Tflops", "t": 1712077275.6254904}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 262.6760962961053, "units": "Tflops", "t": 1712077275.7092562}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 268.5021785805088, "units": "Tflops", "t": 1712077275.791219}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 273.5420242673793, "units": "Tflops", "t": 1712077275.8716674}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 73, "power": 287.341}}, "t": 1712077275.8590279}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 273.6865042612061, "units": "Tflops", "t": 1712077275.952117}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 272.74848998872665, "units": "Tflops", "t": 1712077276.0327904}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 272.6275601025904, "units": "Tflops", "t": 1712077276.113502}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 271.0651740076757, "units": "Tflops", "t": 1712077276.1946776}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 270.2906771164719, "units": "Tflops", "t": 1712077276.2760918}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 273.9043240527408, "units": "Tflops", "t": 1712077276.3564458}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 273.264855547138, "units": "Tflops", "t": 1712077276.4369738}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 73, "power": 295.429}}, "t": 1712077276.3671496}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 269.39385284716394, "units": "Tflops", "t": 1712077276.518722}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 266.80972420036323, "units": "Tflops", "t": 1712077276.601202}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 266.12534009044884, "units": "Tflops", "t": 1712077276.6838827}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 270.47456611901737, "units": "Tflops", "t": 1712077276.7652342}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 273.4795717504233, "units": "Tflops", "t": 1712077276.8456998}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 272.7154251668301, "units": "Tflops", "t": 1712077276.9263902}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 74, "power": 301.252}}, "t": 1712077276.8754086}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 272.7694620792332, "units": "Tflops", "t": 1712077277.0071268}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 272.49466102342467, "units": "Tflops", "t": 1712077277.0878875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 271.0237554759334, "units": "Tflops", "t": 1712077277.1690764}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 270.78266440964177, "units": "Tflops", "t": 1712077277.2503355}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 272.2638056019428, "units": "Tflops", "t": 1712077277.3311536}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 272.3498279052019, "units": "Tflops", "t": 1712077277.411946}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 74, "power": 303.218}}, "t": 1712077277.3833127}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 269.4387101132507, "units": "Tflops", "t": 1712077277.4936788}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 266.6238462141339, "units": "Tflops", "t": 1712077277.576205}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 266.5799212360733, "units": "Tflops", "t": 1712077277.658751}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 270.0880550770809, "units": "Tflops", "t": 1712077277.740217}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 272.7355856897148, "units": "Tflops", "t": 1712077277.8209062}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 272.7718821433681, "units": "Tflops", "t": 1712077277.9015794}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 74, "power": 302.64}}, "t": 1712077277.8922937}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 272.8017331322509, "units": "Tflops", "t": 1712077277.9823017}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 272.7533294156807, "units": "Tflops", "t": 1712077278.0629835}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 272.15615334478537, "units": "Tflops", "t": 1712077278.1438406}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 269.5142929693233, "units": "Tflops", "t": 1712077278.2254796}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 271.9034251685443, "units": "Tflops", "t": 1712077278.3064091}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 271.34108922901333, "units": "Tflops", "t": 1712077278.3874996}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 269.6293232163255, "units": "Tflops", "t": 1712077278.4691033}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 74, "power": 302.713}}, "t": 1712077278.3993769}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 266.44438452341444, "units": "Tflops", "t": 1712077278.5517325}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 267.09018778827016, "units": "Tflops", "t": 1712077278.6341162}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 269.70501306669325, "units": "Tflops", "t": 1712077278.7156987}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 272.85499706698704, "units": "Tflops", "t": 1712077278.7963476}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 272.8824441817637, "units": "Tflops", "t": 1712077278.8769922}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 272.792051014604, "units": "Tflops", "t": 1712077278.9576643}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 74, "power": 304.585}}, "t": 1712077278.90737}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 272.8283624999564, "units": "Tflops", "t": 1712077279.038381}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 271.8986158538173, "units": "Tflops", "t": 1712077279.119315}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 271.47126714411195, "units": "Tflops", "t": 1712077279.2003756}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 270.33266420236396, "units": "Tflops", "t": 1712077279.2817771}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 269.6088311922987, "units": "Tflops", "t": 1712077279.3633924}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 269.35923639911266, "units": "Tflops", "t": 1712077279.44508}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 74, "power": 297.085}}, "t": 1712077279.4144542}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 266.33666960785365, "units": "Tflops", "t": 1712077279.5277438}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 267.522465320497, "units": "Tflops", "t": 1712077279.609996}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 270.0018745932359, "units": "Tflops", "t": 1712077279.6914887}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 273.81243986506684, "units": "Tflops", "t": 1712077279.7718554}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 272.2613945448012, "units": "Tflops", "t": 1712077279.8526797}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 270.80492546352593, "units": "Tflops", "t": 1712077279.933939}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 74, "power": 297.276}}, "t": 1712077279.9263098}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 272.2822917921485, "units": "Tflops", "t": 1712077280.0148227}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 271.6975784999875, "units": "Tflops", "t": 1712077280.0958183}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 271.6231660169563, "units": "Tflops", "t": 1712077280.1768546}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 269.4851570551768, "units": "Tflops", "t": 1712077280.2585075}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 269.55761475696886, "units": "Tflops", "t": 1712077280.3401432}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 268.7995301182861, "units": "Tflops", "t": 1712077280.4220085}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 266.2113683472868, "units": "Tflops", "t": 1712077280.5046582}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 74, "power": 296.878}}, "t": 1712077280.434266}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 267.69716689706064, "units": "Tflops", "t": 1712077280.5869045}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 270.3588136951683, "units": "Tflops", "t": 1712077280.6683087}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 272.8025400064117, "units": "Tflops", "t": 1712077280.7489872}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 272.6904302574172, "units": "Tflops", "t": 1712077280.8296874}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 272.76865540073095, "units": "Tflops", "t": 1712077280.9103656}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 272.86306917463287, "units": "Tflops", "t": 1712077280.991007}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 74, "power": 305.73}}, "t": 1712077280.945368}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 271.8633527435714, "units": "Tflops", "t": 1712077281.0720165}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 269.5591903571311, "units": "Tflops", "t": 1712077281.153654}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 269.537921308937, "units": "Tflops", "t": 1712077281.2352865}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 269.5544636119009, "units": "Tflops", "t": 1712077281.3169227}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 268.46857233499077, "units": "Tflops", "t": 1712077281.3988976}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 267.2565807968629, "units": "Tflops", "t": 1712077281.4812365}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 74, "power": 297.944}}, "t": 1712077281.452501}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 270.03823763035194, "units": "Tflops", "t": 1712077281.5627854}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 269.53870899160927, "units": "Tflops", "t": 1712077281.6444361}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 270.01610244139914, "units": "Tflops", "t": 1712077281.7259264}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 274.35161388433784, "units": "Tflops", "t": 1712077281.8061397}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 271.75601686671956, "units": "Tflops", "t": 1712077281.887109}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 270.07065641987884, "units": "Tflops", "t": 1712077281.96858}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 75, "power": 312.538}}, "t": 1712077281.9635062}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 272.2316616841135, "units": "Tflops", "t": 1712077282.049456}, "pipe": "data"}
+{"event": "end", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "10", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "bf16"], "time": 1712077282.91829, "return_code": 0}, "pipe": null}
diff --git a/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/convnext_large-fp16.D0.data b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/convnext_large-fp16.D0.data
new file mode 100644
index 000000000..a7ec90fd2
--- /dev/null
+++ b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/convnext_large-fp16.D0.data
@@ -0,0 +1,296 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "torchvision", "install_group": "torch", "install_variant": "cuda", "run_name": "majaguma.2024-04-02_16:55:21.895752", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "b7a983fcf74c005cfc04d84913c69872", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision", "plan": {"method": "per_gpu"}, "argv": {"--precision": "fp16", "--lr": 0.01, "--no-stdout": true, "--epochs": 50, "--model": "convnext_large", "--batch-size": 128}, "tags": ["classification", "convnet", "precision-showcase", "vision"], "weight": 0.0, "name": "convnext_large-fp16", "tag": ["convnext_large-fp16", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "trustingspider", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-b19ad74c-adf3-683a-1bdd-0ce516ef4aae": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0.01, "memory": 0.010771942138671876}, "temperature": 73, "power": 105.941, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712077574.944117, "milabench": {"tag": "paice-v1-10-g1243bba", "commit": "1243bbae76bfc8105d553472dcfce834b54a723e", "date": "2024-04-02 12:02:23 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-fp16.D0-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712077574.9599888}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "progress": [0, 50]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [887.5625, 81920.0], "load": 0, "temperature": 68, "power": 62.02}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [887.5625, 81920.0], "load": 0, "temperature": 67, "power": 61.046}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.214508056640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [26890.375, 81920.0], "load": 0.1, "temperature": 70, "power": 323.022}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [1, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.33624267578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [2, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.254852294921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [3, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.179443359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [4, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.268218994140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [5, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.201690673828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [6, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.164154052734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [7, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.13348388671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [8, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.16265869140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.99, "temperature": 72, "power": 274.639}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [9, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.06689453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [10, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0748291015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [11, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.077056884765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [12, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.043701171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [13, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.142242431640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [14, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.1138916015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [15, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.11871337890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [16, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0850830078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.9, "temperature": 73, "power": 291.334}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [17, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.01800537109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 334.5882314734153, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [1, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [18, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.074462890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [19, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9622802734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [20, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.078765869140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 327.53991730891426, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [2, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [21, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.048004150390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [22, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0762939453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 345.00244259034025, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [3, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [23, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.083831787109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [24, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.153045654296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [25, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.962799072265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.9, "temperature": 71, "power": 263.132}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 316.8212055878198, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [4, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [26, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.094970703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [27, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.002777099609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [28, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.088836669921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 318.2203070560282, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [5, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [29, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.033294677734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [30, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.938385009765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [31, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.112030029296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 316.546684591914, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [6, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [32, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "progress": [1, 50]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.87548828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0, "temperature": 69, "power": 95.548}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 297.4660125327079, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [7, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [1, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.84075927734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [2, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.938690185546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [3, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93060302734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 224.81359756193774, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [8, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [4, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.798553466796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [5, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.88812255859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [6, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.874664306640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 316.7061906920338, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [9, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [7, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.944305419921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.9, "temperature": 74, "power": 277.726}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [8, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.958221435546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [9, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.954071044921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 313.2537290564145, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [10, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [10, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.94793701171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [11, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.856536865234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [12, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.89544677734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 316.2556021920928, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [11, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [13, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.912078857421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [14, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9688720703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [15, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.985076904296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 314.2589462979041, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [12, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.89, "temperature": 74, "power": 302.244}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [16, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.878204345703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [17, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.996490478515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [18, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.023529052734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 312.78896120316256, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [13, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [19, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.968353271484375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [20, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.947784423828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [21, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.955780029296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 312.462740954117, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [14, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [22, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0440673828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [23, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.931915283203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [24, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.98193359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 1.0, "temperature": 75, "power": 297.189}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 313.1576098282291, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [15, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [25, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.907257080078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [26, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.994720458984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [27, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.921539306640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 314.2865708101574, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [16, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [28, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.981842041015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [29, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.964111328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [30, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.957977294921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 317.2197622876041, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [17, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [31, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.04656982421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [32, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "progress": [2, 50]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.98, "temperature": 73, "power": 95.84}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.791717529296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 345.76179508286754, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [18, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [1, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.962738037109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [2, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.77764892578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 183.8068383929825, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [19, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [3, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.83221435546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [4, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.86846923828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [5, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.85845947265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 304.35881918841017, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [20, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [6, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.921295166015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.91, "temperature": 74, "power": 350.705}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [7, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.807586669921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [8, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.933868408203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 316.2907130507503, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [21, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [9, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9190673828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [10, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.926849365234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [11, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.95281982421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 314.5222026874448, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [22, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [12, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.90093994140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [13, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.88641357421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [14, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.918609619140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.99, "temperature": 75, "power": 249.855}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 317.49784052892926, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [23, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [15, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.98846435546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [16, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.898773193359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [17, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9808349609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 314.82937612894307, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [24, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [18, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [19, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92547607421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [20, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.915435791015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 314.835593812176, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [25, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [21, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.900665283203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [22, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.936798095703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.9, "temperature": 75, "power": 298.34}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [23, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.848236083984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 316.9509854020993, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [26, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [24, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9835205078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [25, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.911590576171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [26, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.99957275390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 313.959630207359, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [27, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [27, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.94586181640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [28, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9364013671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [29, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.957916259765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 312.41006871422036, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [28, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [30, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.958465576171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.89, "temperature": 76, "power": 272.186}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [31, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.973114013671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [32, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "progress": [3, 50]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.763885498046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 344.2054064981413, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [29, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [1, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.794891357421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 142.31367694418424, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [30, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [2, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.780670166015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [3, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.772857666015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [4, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.878204345703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 334.30059699757544, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [31, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 1.0, "temperature": 76, "power": 308.271}}}, "pipe": "data"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-fp16.D0-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712077625.986699, "return_code": 0}, "pipe": null}
diff --git a/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/convnext_large-fp32.D0.data b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/convnext_large-fp32.D0.data
new file mode 100644
index 000000000..b69ebf3ba
--- /dev/null
+++ b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/convnext_large-fp32.D0.data
@@ -0,0 +1,171 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "torchvision", "install_group": "torch", "install_variant": "cuda", "run_name": "majaguma.2024-04-02_16:55:21.895752", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "b7a983fcf74c005cfc04d84913c69872", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision", "plan": {"method": "per_gpu"}, "argv": {"--precision": "fp32", "--lr": 0.01, "--no-stdout": true, "--epochs": 50, "--model": "convnext_large", "--batch-size": 128}, "tags": ["classification", "convnet", "precision-showcase", "vision"], "weight": 0.0, "name": "convnext_large-fp32", "tag": ["convnext_large-fp32", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "trustingspider", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-b19ad74c-adf3-683a-1bdd-0ce516ef4aae": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 69, "power": 99.212, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712077488.432695, "milabench": {"tag": "paice-v1-10-g1243bba", "commit": "1243bbae76bfc8105d553472dcfce834b54a723e", "date": "2024-04-02 12:02:23 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-fp32.D0-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "fp32", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712077488.4483407}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "progress": [0, 50]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [887.5625, 81920.0], "load": 0, "temperature": 65, "power": 59.57}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [887.5625, 81920.0], "load": 0, "temperature": 64, "power": 59.473}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.214483737945557}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2132.375, 81920.0], "load": 0, "temperature": 64, "power": 88.867}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [1, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.33617639541626}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 0.99, "temperature": 70, "power": 279.627}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [2, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.254825592041016}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [3, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.179396152496338}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 71, "power": 278.931}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [4, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.268494129180908}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 64.06826146510974, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [1, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 0.99, "temperature": 72, "power": 306.34}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [5, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.2018351554870605}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 32.05170496909889, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [2, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [6, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.164165019989014}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 72, "power": 282.359}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 42.183949280901075, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [3, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [7, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.133245468139648}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 0.99, "temperature": 74, "power": 294.444}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 42.144510718717946, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [4, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [8, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.162621021270752}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 42.01739498552834, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [5, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [9, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.066978931427002}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 74, "power": 263.852}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 41.977289260689965, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [6, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [10, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.074810981750488}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 0.99, "temperature": 75, "power": 290.746}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 42.01431417771282, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [7, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [11, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0772786140441895}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 41.870716543509964, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [8, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [12, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.043660640716553}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 75, "power": 320.1}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 41.74912338696986, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [9, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [13, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.142033576965332}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 76, "power": 298.518}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 41.77543474972034, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [10, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [14, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.11406946182251}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 41.78812267537285, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [11, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [15, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.118796348571777}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 76, "power": 281.462}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 41.675553964983436, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [12, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [16, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.084875583648682}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 77, "power": 305.24}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 41.6483561268729, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [13, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [17, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.017924785614014}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 41.663626523918744, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [14, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [18, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0746049880981445}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 77, "power": 294.346}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 41.656239792543204, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [15, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [19, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.962323188781738}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 78, "power": 307.075}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 41.64219096737577, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [16, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [20, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.078864097595215}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 41.59848530035381, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [17, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [21, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.047895908355713}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 77, "power": 274.15}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 41.56146824537856, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [18, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [22, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.076254367828369}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 78, "power": 306.121}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 41.57462530473498, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [19, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [23, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.083705902099609}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 41.54465021830659, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [20, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [24, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.153024196624756}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 78, "power": 322.336}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 41.525081251915616, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [21, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [25, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.962716102600098}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 78, "power": 303.674}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 41.56913394948169, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [22, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [26, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.094931125640869}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 41.56819683146016, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [23, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [27, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.002690315246582}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 78, "power": 316.781}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 41.48043690998095, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [24, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [28, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.089021682739258}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 79, "power": 311.647}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 41.444693488809506, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [25, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [29, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.033406734466553}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 41.50061485770911, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [26, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [30, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93844747543335}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 79, "power": 286.178}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 41.475665475643034, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [27, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [31, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.112069606781006}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 0.99, "temperature": 79, "power": 298.131}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 41.428615750254686, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [28, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [32, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "progress": [1, 50]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.875514507293701}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 54.17718731562482, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [29, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [1, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.840810298919678}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 79, "power": 283.599}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 63.89109500902222, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [30, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [2, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.938738822937012}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 78, "power": 282.671}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 31.493411880743533, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [31, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 79, "power": 275.417}}}, "pipe": "data"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-fp32.D0-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "fp32", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712077572.257265, "return_code": 0}, "pipe": null}
diff --git a/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/convnext_large-tf32-fp16.D0.data b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/convnext_large-tf32-fp16.D0.data
new file mode 100644
index 000000000..3340f63d2
--- /dev/null
+++ b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/convnext_large-tf32-fp16.D0.data
@@ -0,0 +1,295 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "torchvision", "install_group": "torch", "install_variant": "cuda", "run_name": "majaguma.2024-04-02_16:55:21.895752", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "b7a983fcf74c005cfc04d84913c69872", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision", "plan": {"method": "per_gpu"}, "argv": {"--precision": "tf32-fp16", "--lr": 0.01, "--no-stdout": true, "--epochs": 50, "--model": "convnext_large", "--batch-size": 128}, "tags": ["classification", "convnet", "precision-showcase", "vision"], "weight": 3.0, "name": "convnext_large-tf32-fp16", "tag": ["convnext_large-tf32-fp16", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "trustingspider", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-b19ad74c-adf3-683a-1bdd-0ce516ef4aae": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0.01, "memory": 0.010771942138671876}, "temperature": 72, "power": 103.999, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712077687.954617, "milabench": {"tag": "paice-v1-10-g1243bba", "commit": "1243bbae76bfc8105d553472dcfce834b54a723e", "date": "2024-04-02 12:02:23 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-tf32-fp16.D0-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32-fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712077687.97126}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "progress": [0, 50]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [887.5625, 81920.0], "load": 0, "temperature": 67, "power": 61.242}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [887.5625, 81920.0], "load": 0, "temperature": 66, "power": 60.338}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.214508056640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2206.375, 81920.0], "load": 0, "temperature": 66, "power": 90.804}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [1, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.33624267578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [2, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.254852294921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [3, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.179443359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [4, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.268218994140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [5, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.201690673828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [6, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.164154052734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [7, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.13348388671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.9, "temperature": 71, "power": 307.676}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [8, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.16265869140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [9, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.06689453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [10, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0748291015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [11, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.077056884765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [12, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.043701171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [13, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.142242431640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [14, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.1138916015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [15, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.11871337890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.9, "temperature": 72, "power": 282.728}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [16, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0850830078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [17, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.01800537109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 331.4929569504289, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [1, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [18, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.074462890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [19, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9622802734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [20, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.078765869140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 319.82372249235414, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [2, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [21, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.048004150390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [22, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0762939453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [23, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.083831787109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 346.9176883316549, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [3, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.89, "temperature": 72, "power": 315.776}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [24, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.153045654296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [25, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.962799072265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 313.33944869938676, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [4, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [26, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.094970703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [27, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.002777099609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [28, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.088836669921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 316.31352898474483, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [5, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [29, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.033294677734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [30, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.938385009765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [31, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.112030029296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 319.76039580787824, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [6, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [32, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "progress": [1, 50]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.95, "temperature": 72, "power": 316.931}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.87548828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 311.41370066668395, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [7, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [1, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.84075927734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [2, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.938690185546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [3, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93060302734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 225.86195109685386, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [8, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [4, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.798553466796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [5, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.88812255859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [6, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.874664306640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.99, "temperature": 73, "power": 300.075}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 314.7426827467468, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [9, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [7, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.944305419921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [8, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.958221435546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [9, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.954071044921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 310.1130353857362, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [10, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [10, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.94793701171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [11, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.856536865234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [12, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.89544677734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 316.289810350307, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [11, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [13, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.912078857421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [14, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9688720703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.9, "temperature": 73, "power": 346.245}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [15, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.985076904296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 317.47967621401995, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [12, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [16, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.878204345703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [17, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.996490478515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [18, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.023529052734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 313.2666860525447, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [13, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [19, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.968353271484375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [20, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.947784423828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [21, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.955780029296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 315.01726300753967, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [14, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [22, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0440673828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.9, "temperature": 74, "power": 278.409}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [23, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.931915283203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [24, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.98193359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 316.4962227634663, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [15, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [25, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.907257080078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [26, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.994720458984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [27, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.921539306640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 316.6834852262511, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [16, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [28, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.981842041015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [29, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.964111328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [30, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.957977294921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 315.4366982044768, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [17, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [31, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.04656982421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.9, "temperature": 75, "power": 296.984}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [32, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "progress": [2, 50]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.791717529296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 346.27779378253, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [18, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [1, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.962738037109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [2, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.77764892578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 203.5596768537521, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [19, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [3, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.83221435546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [4, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.86846923828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [5, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.85845947265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 321.7080049011913, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [20, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 1.0, "temperature": 74, "power": 144.08}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [6, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.921295166015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [7, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.807586669921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [8, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.933868408203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 345.79774450935724, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [21, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [9, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9190673828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [10, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.926849365234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 301.8809506292455, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [22, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [11, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.95281982421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [12, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.90093994140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [13, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.88641357421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.99, "temperature": 75, "power": 352.88}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 313.2189940989949, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [23, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [14, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.918609619140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [15, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.98846435546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [16, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.898773193359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 312.8209222407698, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [24, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [17, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9808349609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [18, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [19, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92547607421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 313.41093955709977, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [25, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [20, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.915435791015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [21, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.900665283203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 1.0, "temperature": 75, "power": 300.282}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [22, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.936798095703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 315.3416509374621, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [26, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [23, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.848236083984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [24, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9835205078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [25, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.911590576171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 314.2316568080467, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [27, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [26, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.99957275390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [27, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.94586181640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [28, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9364013671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 315.61995332478983, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [28, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [29, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.957916259765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.89, "temperature": 75, "power": 291.612}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [30, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.958465576171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [31, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.973114013671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 313.95434470196216, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [29, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [32, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "progress": [3, 50]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.763885498046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 275.4416541767326, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [30, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [1, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.794891357421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [2, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.780670166015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [3, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.772857666015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 222.08786703109882, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [31, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.99, "temperature": 75, "power": 301.761}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.99, "temperature": 75, "power": 301.761}}}, "pipe": "data"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-tf32-fp16.D0-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32-fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712077739.0546155, "return_code": 0}, "pipe": null}
diff --git a/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/convnext_large-tf32.D0.data b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/convnext_large-tf32.D0.data
new file mode 100644
index 000000000..1223ea551
--- /dev/null
+++ b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/convnext_large-tf32.D0.data
@@ -0,0 +1,200 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "torchvision", "install_group": "torch", "install_variant": "cuda", "run_name": "majaguma.2024-04-02_16:55:21.895752", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "b7a983fcf74c005cfc04d84913c69872", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision", "plan": {"method": "per_gpu"}, "argv": {"--precision": "tf32", "--lr": 0.01, "--no-stdout": true, "--epochs": 50, "--model": "convnext_large", "--batch-size": 128}, "tags": ["classification", "convnet", "precision-showcase", "vision"], "weight": 0.0, "name": "convnext_large-tf32", "tag": ["convnext_large-tf32", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "trustingspider", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-b19ad74c-adf3-683a-1bdd-0ce516ef4aae": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 71, "power": 102.443, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712077628.706463, "milabench": {"tag": "paice-v1-10-g1243bba", "commit": "1243bbae76bfc8105d553472dcfce834b54a723e", "date": "2024-04-02 12:02:23 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-tf32.D0-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712077628.7231634}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "progress": [0, 50]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [887.5625, 81920.0], "load": 0, "temperature": 66, "power": 60.753}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [887.5625, 81920.0], "load": 0, "temperature": 65, "power": 59.75}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.2144575119018555}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5832.375, 81920.0], "load": 0.02, "temperature": 65, "power": 91.0}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [1, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.336220741271973}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [2, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.2547926902771}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [3, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.179419040679932}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [4, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.268465518951416}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 71, "power": 291.293}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [5, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.2018303871154785}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [6, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.1641645431518555}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [7, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.133294105529785}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 72, "power": 330.126}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [8, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.162642955780029}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 88.17739619499449, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [1, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [9, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.066996097564697}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [10, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.074804782867432}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 152.33050351039606, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [2, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [11, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.077259063720703}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 72, "power": 306.133}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 97.8132574719792, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [3, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [12, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0435991287231445}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [13, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.142058372497559}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 151.57353773877497, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [4, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [14, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.114046573638916}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 98.0459629547178, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [5, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [15, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.1187896728515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 73, "power": 336.314}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [16, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.084855556488037}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 151.16311466571597, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [6, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [17, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.017906188964844}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 97.4397259817078, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [7, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [18, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.074584484100342}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [19, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.962344169616699}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 74, "power": 304.11}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 150.89702564161328, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [8, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [20, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.078824996948242}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 97.32629468860473, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [9, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [21, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.047876834869385}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [22, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0762128829956055}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 150.83456277495438, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [10, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 74, "power": 309.786}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [23, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.083765983581543}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 97.03157142999105, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [11, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [24, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.153017044067383}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [25, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.962660312652588}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 150.29697822799912, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [12, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [26, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.094902038574219}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 75, "power": 292.02}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 97.17432371590144, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [13, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [27, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.002725601196289}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [28, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.089033126831055}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 150.7771253290328, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [14, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [29, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.033377170562744}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 96.80165999479628, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [15, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 0.97, "temperature": 75, "power": 287.422}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [30, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.938430309295654}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [31, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.112060070037842}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 150.32260128648502, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [16, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [32, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "progress": [1, 50]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.875522613525391}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 146.5297707551696, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [17, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [1, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.840785503387451}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 0.97, "temperature": 75, "power": 264.36}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.8519481533466, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [18, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [2, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.938703536987305}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [3, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9308247566223145}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 150.05294691499935, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [19, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [4, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.798532009124756}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 97.068528836604, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [20, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [5, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.888091087341309}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 76, "power": 281.368}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [6, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.874434947967529}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 150.4556437799436, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [21, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [7, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.944365501403809}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 96.5590262374041, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [22, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [8, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.958274841308594}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [9, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.953979015350342}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 0.99, "temperature": 76, "power": 294.352}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 148.47920470380544, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [23, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [10, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.947924613952637}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 96.66047118859383, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [24, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [11, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.85654878616333}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [12, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.895290374755859}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 0.97, "temperature": 76, "power": 352.312}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 149.88333581726906, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [25, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [13, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.912010192871094}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 96.42080786133188, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [26, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [14, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.968919277191162}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [15, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.984976768493652}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 149.5718912138979, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [27, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [16, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.878252029418945}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 76, "power": 309.247}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 96.62590463625313, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [28, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [17, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.996346950531006}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [18, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.023502349853516}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 149.34277192097105, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [29, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [19, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.968320369720459}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 0.97, "temperature": 77, "power": 307.577}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 96.53451214884157, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [30, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [20, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.947745323181152}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [21, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.955850124359131}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 149.43709916737896, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [31, 30]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 0.97, "temperature": 77, "power": 286.517}}}, "pipe": "data"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-tf32.D0-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712077685.256696, "return_code": 0}, "pipe": null}
diff --git a/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/davit_large-multi.0.data b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/davit_large-multi.0.data
new file mode 100644
index 000000000..714866f5d
--- /dev/null
+++ b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/davit_large-multi.0.data
@@ -0,0 +1,269 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "timm", "install_group": "torch", "install_variant": "cuda", "run_name": "majaguma.2024-04-02_16:55:21.895752", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "b7a983fcf74c005cfc04d84913c69872", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm", "plan": {"method": "njobs", "n": 1}, "argv": {"--amp": true, "--model": "davit_large", "--batch-size": 128, "--lr-base": 0.01}, "tags": ["classification", "multigpu", "transformer", "vision"], "weight": 5.0, "name": "davit_large-multi", "tag": ["davit_large-multi", "0"], "job-number": 0, "devices": ["0"]}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "trustingspider", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-b19ad74c-adf3-683a-1bdd-0ce516ef4aae": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0.53, "memory": 0.010771942138671876}, "temperature": 73, "power": 106.843, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712078655.279481, "milabench": {"tag": "paice-v1-10-g1243bba", "commit": "1243bbae76bfc8105d553472dcfce834b54a723e", "date": "2024-04-02 12:02:23 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-davit_large-multi.0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "davit_large", "--batch-size", "128", "--lr-base", "0.01", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/majaguma.2024-04-02_16:55:21.895752/davit_large-multi.0", "--checkpoint-hist", "1"], "time": 1712078655.2956676}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "line", "data": "Training with a single process on 1 device (cuda:0).\n", "pipe": "stderr"}
+{"event": "line", "data": "Model davit_large created, param count:196811752\n", "pipe": "stderr"}
+{"event": "line", "data": "Data processing configuration for current model + dataset:\n", "pipe": "stderr"}
+{"event": "line", "data": "\tinput_size: (3, 224, 224)\n", "pipe": "stderr"}
+{"event": "line", "data": "\tinterpolation: bicubic\n", "pipe": "stderr"}
+{"event": "line", "data": "\tmean: (0.485, 0.456, 0.406)\n", "pipe": "stderr"}
+{"event": "line", "data": "\tstd: (0.229, 0.224, 0.225)\n", "pipe": "stderr"}
+{"event": "line", "data": "\tcrop_pct: 0.95\n", "pipe": "stderr"}
+{"event": "line", "data": "\tcrop_mode: center\n", "pipe": "stderr"}
+{"event": "line", "data": "Learning rate (0.005) calculated from base learning rate (0.01) and global batch size (128) with linear scaling.\n", "pipe": "stderr"}
+{"event": "line", "data": "Using native Torch AMP. Training in mixed precision.\n", "pipe": "stderr"}
+{"event": "line", "data": "Scheduled epochs: 300. LR stepped per epoch.\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2326.375, 81920.0], "load": 0, "temperature": 68, "power": 93.377}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.224214553833008}, "pipe": "data"}
+{"event": "line", "data": "Train: 0 [ 0/32 ( 0%)] Loss: 7.224 (7.22) Time: 3.226s, 39.67/s (3.226s, 39.67/s) LR: 1.000e-05 Data: 0.693 (0.693)\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "train", "loss": 7.176412582397461}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32478.375, 81920.0], "load": 1.0, "temperature": 72, "power": 281.211}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.255987167358398}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.163339138031006}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32478.375, 81920.0], "load": 0.99, "temperature": 73, "power": 273.253}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.234711647033691}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.243332386016846}, "pipe": "data"}
+{"event": "data", "data": {"rate": 294.8439726928596, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.048550128936768}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32478.375, 81920.0], "load": 1.0, "temperature": 74, "power": 340.826}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 276.51176852556966, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.24672794342041}, "pipe": "data"}
+{"event": "data", "data": {"rate": 307.9046713698683, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.151210784912109}, "pipe": "data"}
+{"event": "data", "data": {"rate": 275.5703406410989, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32478.375, 81920.0], "load": 0.96, "temperature": 75, "power": 299.033}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.270848274230957}, "pipe": "data"}
+{"event": "data", "data": {"rate": 307.75355878822086, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"}
+{"event": "data", "data": {"rate": 276.34615238221676, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.312984943389893}, "pipe": "data"}
+{"event": "data", "data": {"rate": 307.94670127782297, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32478.375, 81920.0], "load": 0.96, "temperature": 75, "power": 300.091}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.311421871185303}, "pipe": "data"}
+{"event": "line", "data": "Train: 0 [ 31/32 (100%)] Loss: 7.311 (7.24) Time: 0.417s, 307.07/s (0.509s, 251.35/s) LR: 1.000e-05 Data: 0.001 (0.029)\n", "pipe": "stderr"}
+{"event": "line", "data": "Test: [ 0/32] Time: 0.718 (0.718) Loss: 7.1176 (7.1176) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"}
+{"event": "line", "data": "Test: [ 32/32] Time: 0.333 (0.179) Loss: 7.0505 (7.2336) Acc@1: 0.0000 ( 0.0969) Acc@5: 0.0000 ( 0.5329)\n", "pipe": "stderr"}
+{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"}
+{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/majaguma.2024-04-02_16:55:21.895752/davit_large-multi.0/20240402-172421-davit_large-224/checkpoint-0.pth.tar', 0.09689922480620156)\n", "pipe": "stderr"}
+{"event": "line", "data": "\n", "pipe": "stderr"}
+{"event": "data", "data": {"rate": 307.7163250994223, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32722.375, 81920.0], "load": 0.93, "temperature": 75, "power": 237.218}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32722.375, 81920.0], "load": 0.97, "temperature": 76, "power": 306.856}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.413616180419922}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [30446.375, 81920.0], "load": 0, "temperature": 70, "power": 343.453}}}, "pipe": "data"}
+{"event": "line", "data": "Train: 1 [ 0/32 ( 0%)] Loss: 7.414 (7.41) Time: 0.950s, 134.69/s (0.950s, 134.69/s) LR: 1.008e-03 Data: 0.535 (0.535)\n", "pipe": "stderr"}
+{"event": "data", "data": {"rate": 274.2118566931733, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.087185859680176}, "pipe": "data"}
+{"event": "data", "data": {"rate": 306.28555946930277, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0225982666015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32552.375, 81920.0], "load": 0.99, "temperature": 75, "power": 252.528}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 276.6412672521491, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.015655040740967}, "pipe": "data"}
+{"event": "data", "data": {"rate": 306.75531914562185, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.004358291625977}, "pipe": "data"}
+{"event": "data", "data": {"rate": 276.36553728745525, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32552.375, 81920.0], "load": 0.97, "temperature": 75, "power": 325.319}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.06734561920166}, "pipe": "data"}
+{"event": "data", "data": {"rate": 307.4019411883425, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"}
+{"event": "data", "data": {"rate": 273.8358524441842, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.943554401397705}, "pipe": "data"}
+{"event": "data", "data": {"rate": 303.7898200130995, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.032367706298828}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32552.375, 81920.0], "load": 0.96, "temperature": 76, "power": 252.35}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 272.8785524880894, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.978535175323486}, "pipe": "data"}
+{"event": "data", "data": {"rate": 304.6717255581839, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.981788635253906}, "pipe": "data"}
+{"event": "data", "data": {"rate": 274.5677901320675, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32552.375, 81920.0], "load": 0.96, "temperature": 76, "power": 300.67}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.017374038696289}, "pipe": "data"}
+{"event": "data", "data": {"rate": 305.15768337875835, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"}
+{"event": "line", "data": "Train: 1 [ 31/32 (100%)] Loss: 6.997 (7.05) Time: 0.415s, 308.21/s (0.439s, 291.68/s) LR: 1.008e-03 Data: 0.000 (0.024)\n", "pipe": "stderr"}
+{"event": "line", "data": "Test: [ 0/32] Time: 0.603 (0.603) Loss: 6.8701 (6.8701) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.7812 ( 0.7812)\n", "pipe": "stderr"}
+{"event": "line", "data": "Test: [ 32/32] Time: 0.043 (0.168) Loss: 6.7062 (6.8681) Acc@1: 0.0000 ( 0.2665) Acc@5: 3.1250 ( 1.3081)\n", "pipe": "stderr"}
+{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"}
+{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/majaguma.2024-04-02_16:55:21.895752/davit_large-multi.0/20240402-172421-davit_large-224/checkpoint-1.pth.tar', 0.26647286821705424)\n", "pipe": "stderr"}
+{"event": "line", "data": "\n", "pipe": "stderr"}
+{"event": "data", "data": {"rate": 307.4648709014338, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32796.375, 81920.0], "load": 0.95, "temperature": 76, "power": 301.646}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32796.375, 81920.0], "load": 0.97, "temperature": 76, "power": 297.301}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32796.375, 81920.0], "load": 0.03, "temperature": 71, "power": 99.384}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.885659694671631}, "pipe": "data"}
+{"event": "line", "data": "Train: 2 [ 0/32 ( 0%)] Loss: 6.886 (6.89) Time: 0.930s, 137.70/s (0.930s, 137.70/s) LR: 2.006e-03 Data: 0.516 (0.516)\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "train", "loss": 6.915807247161865}, "pipe": "data"}
+{"event": "data", "data": {"rate": 306.7073308617536, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33040.375, 81920.0], "load": 0.96, "temperature": 75, "power": 303.977}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 273.830104999405, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9318437576293945}, "pipe": "data"}
+{"event": "data", "data": {"rate": 304.7788103103591, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9413652420043945}, "pipe": "data"}
+{"event": "data", "data": {"rate": 272.8196715959206, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.995604515075684}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33040.375, 81920.0], "load": 0.96, "temperature": 76, "power": 318.855}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 306.1788308935454, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.912203788757324}, "pipe": "data"}
+{"event": "data", "data": {"rate": 274.54100221769824, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.013670921325684}, "pipe": "data"}
+{"event": "data", "data": {"rate": 305.43472947405, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33040.375, 81920.0], "load": 0.95, "temperature": 76, "power": 307.78}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 272.5777511504807, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.896271228790283}, "pipe": "data"}
+{"event": "data", "data": {"rate": 303.8248546557082, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.970767974853516}, "pipe": "data"}
+{"event": "data", "data": {"rate": 273.0200383387749, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33040.375, 81920.0], "load": 0.99, "temperature": 77, "power": 312.726}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.092811584472656}, "pipe": "data"}
+{"event": "data", "data": {"rate": 304.67624334344293, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.033717155456543}, "pipe": "data"}
+{"event": "data", "data": {"rate": 273.75036308948916, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"}
+{"event": "line", "data": "Train: 2 [ 31/32 (100%)] Loss: 6.948 (6.96) Time: 0.415s, 308.07/s (0.439s, 291.73/s) LR: 2.006e-03 Data: 0.000 (0.023)\n", "pipe": "stderr"}
+{"event": "line", "data": "Test: [ 0/32] Time: 0.570 (0.570) Loss: 6.7530 (6.7530) Acc@1: 0.7812 ( 0.7812) Acc@5: 2.3438 ( 2.3438)\n", "pipe": "stderr"}
+{"event": "line", "data": "Test: [ 32/32] Time: 0.043 (0.167) Loss: 6.4966 (6.8226) Acc@1: 3.1250 ( 0.2422) Acc@5: 6.2500 ( 1.0417)\n", "pipe": "stderr"}
+{"event": "data", "data": {"rate": 307.36380818917365, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33284.375, 81920.0], "load": 0.94, "temperature": 76, "power": 303.77}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33284.375, 81920.0], "load": 0.94, "temperature": 77, "power": 283.326}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33284.375, 81920.0], "load": 0.03, "temperature": 72, "power": 100.261}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.86574649810791}, "pipe": "data"}
+{"event": "line", "data": "Train: 3 [ 0/32 ( 0%)] Loss: 6.866 (6.87) Time: 0.954s, 134.22/s (0.954s, 134.22/s) LR: 3.004e-03 Data: 0.539 (0.539)\n", "pipe": "stderr"}
+{"event": "data", "data": {"rate": 276.79999920074, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.860692024230957}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33528.375, 81920.0], "load": 0.97, "temperature": 76, "power": 314.448}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 273.0181789293082, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.905224323272705}, "pipe": "data"}
+{"event": "data", "data": {"rate": 305.36922695818373, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.900017738342285}, "pipe": "data"}
+{"event": "data", "data": {"rate": 272.7294380982873, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33528.375, 81920.0], "load": 0.96, "temperature": 76, "power": 284.911}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.911055088043213}, "pipe": "data"}
+{"event": "data", "data": {"rate": 304.01918877913823, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"}
+{"event": "data", "data": {"rate": 273.1311786486939, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.976555824279785}, "pipe": "data"}
+{"event": "data", "data": {"rate": 305.0030525205892, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.897703170776367}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33528.375, 81920.0], "load": 0.96, "temperature": 77, "power": 301.18}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 274.03801670530396, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.003688812255859}, "pipe": "data"}
+{"event": "data", "data": {"rate": 303.24990985699, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.037755966186523}, "pipe": "data"}
+{"event": "data", "data": {"rate": 272.1906413385024, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33528.375, 81920.0], "load": 0.96, "temperature": 77, "power": 311.303}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.975908279418945}, "pipe": "data"}
+{"event": "data", "data": {"rate": 305.08575910635346, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"}
+{"event": "data", "data": {"rate": 272.97039532206026, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.015989303588867}, "pipe": "data"}
+{"event": "line", "data": "Train: 3 [ 31/32 (100%)] Loss: 6.998 (6.96) Time: 0.418s, 306.53/s (0.440s, 290.99/s) LR: 3.004e-03 Data: 0.000 (0.024)\n", "pipe": "stderr"}
+{"event": "line", "data": "Test: [ 0/32] Time: 0.595 (0.595) Loss: 6.8181 (6.8181) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"}
+{"event": "line", "data": "Test: [ 32/32] Time: 0.043 (0.167) Loss: 6.3763 (6.8167) Acc@1: 0.0000 ( 0.1938) Acc@5: 9.3750 ( 0.9932)\n", "pipe": "stderr"}
+{"event": "data", "data": {"rate": 307.5569831126179, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33528.375, 81920.0], "load": 0, "temperature": 73, "power": 101.877}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33772.375, 81920.0], "load": 0.94, "temperature": 77, "power": 214.339}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33772.375, 81920.0], "load": 0.03, "temperature": 72, "power": 100.928}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8576154708862305}, "pipe": "data"}
+{"event": "line", "data": "Train: 4 [ 0/32 ( 0%)] Loss: 6.858 (6.86) Time: 0.942s, 135.91/s (0.942s, 135.91/s) LR: 4.002e-03 Data: 0.527 (0.527)\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33942.375, 81920.0], "load": 0, "temperature": 74, "power": 297.878}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.830388069152832}, "pipe": "data"}
+{"event": "data", "data": {"rate": 286.5000055845118, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"}
+{"event": "data", "data": {"rate": 263.89076398509974, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.881205081939697}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34016.375, 81920.0], "load": 0.99, "temperature": 76, "power": 310.986}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 305.82670597304406, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.920263290405273}, "pipe": "data"}
+{"event": "data", "data": {"rate": 274.0574188767639, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.918469429016113}, "pipe": "data"}
+{"event": "data", "data": {"rate": 303.6952405480592, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34016.375, 81920.0], "load": 0.96, "temperature": 76, "power": 271.907}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.993577003479004}, "pipe": "data"}
+{"event": "data", "data": {"rate": 274.3115977402665, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.038257598876953}, "pipe": "data"}
+{"event": "data", "data": {"rate": 304.79523726581255, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"}
+{"event": "data", "data": {"rate": 273.60850811477786, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0744194984436035}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34016.375, 81920.0], "load": 0.96, "temperature": 76, "power": 311.459}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 305.12726899709185, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.013601303100586}, "pipe": "data"}
+{"event": "data", "data": {"rate": 274.09323426975715, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.049444675445557}, "pipe": "data"}
+{"event": "data", "data": {"rate": 306.12720537247503, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34016.375, 81920.0], "load": 0.96, "temperature": 77, "power": 290.782}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.013031482696533}, "pipe": "data"}
+{"event": "data", "data": {"rate": 273.40695289730274, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"}
+{"event": "line", "data": "Train: 4 [ 31/32 (100%)] Loss: 7.093 (6.97) Time: 0.417s, 307.27/s (0.439s, 291.56/s) LR: 4.002e-03 Data: 0.000 (0.023)\n", "pipe": "stderr"}
+{"event": "line", "data": "Test: [ 0/32] Time: 0.589 (0.589) Loss: 6.8217 (6.8217) Acc@1: 0.7812 ( 0.7812) Acc@5: 2.3438 ( 2.3438)\n", "pipe": "stderr"}
+{"event": "line", "data": "Test: [ 32/32] Time: 0.043 (0.167) Loss: 6.7427 (6.8290) Acc@1: 3.1250 ( 0.3634) Acc@5: 3.1250 ( 1.1628)\n", "pipe": "stderr"}
+{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"}
+{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/majaguma.2024-04-02_16:55:21.895752/davit_large-multi.0/20240402-172421-davit_large-224/checkpoint-4.pth.tar', 0.3633720930232558)\n", "pipe": "stderr"}
+{"event": "line", "data": "\n", "pipe": "stderr"}
+{"event": "data", "data": {"rate": 306.9712804876606, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34260.375, 81920.0], "load": 0.97, "temperature": 77, "power": 287.721}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34260.375, 81920.0], "load": 0.93, "temperature": 77, "power": 291.642}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34260.375, 81920.0], "load": 0.03, "temperature": 72, "power": 99.871}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.820207595825195}, "pipe": "data"}
+{"event": "line", "data": "Train: 5 [ 0/32 ( 0%)] Loss: 6.820 (6.82) Time: 0.953s, 134.34/s (0.953s, 134.34/s) LR: 4.997e-03 Data: 0.539 (0.539)\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "train", "loss": 6.873222351074219}, "pipe": "data"}
+{"event": "data", "data": {"rate": 305.7752899390563, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"}
+{"event": "stop", "data": null, "pipe": "data"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-davit_large-multi.0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "davit_large", "--batch-size", "128", "--lr-base", "0.01", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/majaguma.2024-04-02_16:55:21.895752/davit_large-multi.0", "--checkpoint-hist", "1"], "time": 1712078776.5063932, "return_code": -15}, "pipe": null}
diff --git a/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/davit_large.D0.data b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/davit_large.D0.data
new file mode 100644
index 000000000..1ee42fced
--- /dev/null
+++ b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/davit_large.D0.data
@@ -0,0 +1,255 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "timm", "install_group": "torch", "install_variant": "cuda", "run_name": "majaguma.2024-04-02_16:55:21.895752", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "b7a983fcf74c005cfc04d84913c69872", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm", "plan": {"method": "per_gpu"}, "argv": {"--amp": true, "--model": "davit_large", "--batch-size": 128, "--lr-base": 0.01}, "tags": ["classification", "transformer", "vision"], "weight": 1.0, "name": "davit_large", "tag": ["davit_large", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "trustingspider", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-b19ad74c-adf3-683a-1bdd-0ce516ef4aae": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0.23, "memory": 0.010771942138671876}, "temperature": 70, "power": 101.456, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712078543.67927, "milabench": {"tag": "paice-v1-10-g1243bba", "commit": "1243bbae76bfc8105d553472dcfce834b54a723e", "date": "2024-04-02 12:02:23 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-davit_large.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "davit_large", "--batch-size", "128", "--lr-base", "0.01", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/majaguma.2024-04-02_16:55:21.895752/davit_large.D0", "--checkpoint-hist", "1"], "time": 1712078543.6959698}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "line", "data": "Training with a single process on 1 device (cuda:0).\n", "pipe": "stderr"}
+{"event": "line", "data": "Model davit_large created, param count:196811752\n", "pipe": "stderr"}
+{"event": "line", "data": "Data processing configuration for current model + dataset:\n", "pipe": "stderr"}
+{"event": "line", "data": "\tinput_size: (3, 224, 224)\n", "pipe": "stderr"}
+{"event": "line", "data": "\tinterpolation: bicubic\n", "pipe": "stderr"}
+{"event": "line", "data": "\tmean: (0.485, 0.456, 0.406)\n", "pipe": "stderr"}
+{"event": "line", "data": "\tstd: (0.229, 0.224, 0.225)\n", "pipe": "stderr"}
+{"event": "line", "data": "\tcrop_pct: 0.95\n", "pipe": "stderr"}
+{"event": "line", "data": "\tcrop_mode: center\n", "pipe": "stderr"}
+{"event": "line", "data": "Learning rate (0.005) calculated from base learning rate (0.01) and global batch size (128) with linear scaling.\n", "pipe": "stderr"}
+{"event": "line", "data": "Using native Torch AMP. Training in mixed precision.\n", "pipe": "stderr"}
+{"event": "line", "data": "Scheduled epochs: 300. LR stepped per epoch.\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2250.375, 81920.0], "load": 0, "temperature": 66, "power": 90.804}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.2242937088012695}, "pipe": "data"}
+{"event": "line", "data": "Train: 0 [ 0/32 ( 0%)] Loss: 7.224 (7.22) Time: 3.254s, 39.33/s (3.254s, 39.33/s) LR: 1.000e-05 Data: 0.707 (0.707)\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "train", "loss": 7.176398277282715}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32478.375, 81920.0], "load": 1.0, "temperature": 70, "power": 294.838}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.255929470062256}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.163320541381836}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32478.375, 81920.0], "load": 0.99, "temperature": 71, "power": 275.042}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.234607696533203}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.243466377258301}, "pipe": "data"}
+{"event": "data", "data": {"rate": 300.5270940005941, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.048627853393555}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32478.375, 81920.0], "load": 0.98, "temperature": 71, "power": 278.944}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 278.30462812791, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.246768951416016}, "pipe": "data"}
+{"event": "data", "data": {"rate": 310.2403670135786, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.151220321655273}, "pipe": "data"}
+{"event": "data", "data": {"rate": 278.9808910436719, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32478.375, 81920.0], "load": 0.96, "temperature": 73, "power": 275.41}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.270956993103027}, "pipe": "data"}
+{"event": "data", "data": {"rate": 309.4637261666249, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"}
+{"event": "data", "data": {"rate": 276.2228758698495, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.3129563331604}, "pipe": "data"}
+{"event": "data", "data": {"rate": 307.27961934508033, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32478.375, 81920.0], "load": 0.95, "temperature": 73, "power": 298.615}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.311375617980957}, "pipe": "data"}
+{"event": "line", "data": "Train: 0 [ 31/32 (100%)] Loss: 7.311 (7.24) Time: 0.416s, 307.55/s (0.509s, 251.57/s) LR: 1.000e-05 Data: 0.001 (0.029)\n", "pipe": "stderr"}
+{"event": "line", "data": "Test: [ 0/32] Time: 0.723 (0.723) Loss: 7.1174 (7.1174) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"}
+{"event": "line", "data": "Test: [ 32/32] Time: 0.339 (0.179) Loss: 7.0508 (7.2335) Acc@1: 0.0000 ( 0.0969) Acc@5: 0.0000 ( 0.5329)\n", "pipe": "stderr"}
+{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"}
+{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/majaguma.2024-04-02_16:55:21.895752/davit_large.D0/20240402-172229-davit_large-224/checkpoint-0.pth.tar', 0.09689922480620156)\n", "pipe": "stderr"}
+{"event": "line", "data": "\n", "pipe": "stderr"}
+{"event": "data", "data": {"rate": 308.49873079042874, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32722.375, 81920.0], "load": 0.94, "temperature": 74, "power": 306.482}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32722.375, 81920.0], "load": 0.97, "temperature": 74, "power": 296.396}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.413697719573975}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31890.375, 81920.0], "load": 0, "temperature": 69, "power": 313.697}}}, "pipe": "data"}
+{"event": "line", "data": "Train: 1 [ 0/32 ( 0%)] Loss: 7.414 (7.41) Time: 0.953s, 134.28/s (0.953s, 134.28/s) LR: 1.008e-03 Data: 0.539 (0.539)\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "train", "loss": 7.087247848510742}, "pipe": "data"}
+{"event": "data", "data": {"rate": 285.3636305029402, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"}
+{"event": "data", "data": {"rate": 275.9915872373749, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.022705078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32552.375, 81920.0], "load": 0.96, "temperature": 74, "power": 337.603}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 308.24407199088466, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.015710353851318}, "pipe": "data"}
+{"event": "data", "data": {"rate": 277.388256161822, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.004858016967773}, "pipe": "data"}
+{"event": "data", "data": {"rate": 306.53774873595506, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32552.375, 81920.0], "load": 0.96, "temperature": 74, "power": 281.279}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.067661285400391}, "pipe": "data"}
+{"event": "data", "data": {"rate": 276.0976480936771, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.943252086639404}, "pipe": "data"}
+{"event": "data", "data": {"rate": 307.91200463468505, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"}
+{"event": "data", "data": {"rate": 276.4420175081399, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.032105445861816}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32552.375, 81920.0], "load": 0.95, "temperature": 75, "power": 295.13}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 307.00961842346175, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.978611946105957}, "pipe": "data"}
+{"event": "data", "data": {"rate": 275.1157522660887, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.981616973876953}, "pipe": "data"}
+{"event": "data", "data": {"rate": 307.53203742029007, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32552.375, 81920.0], "load": 0.98, "temperature": 75, "power": 289.09}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.01680326461792}, "pipe": "data"}
+{"event": "data", "data": {"rate": 275.15100375757925, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"}
+{"event": "line", "data": "Train: 1 [ 31/32 (100%)] Loss: 6.997 (7.05) Time: 0.415s, 308.57/s (0.438s, 292.34/s) LR: 1.008e-03 Data: 0.000 (0.024)\n", "pipe": "stderr"}
+{"event": "line", "data": "Test: [ 0/32] Time: 0.568 (0.568) Loss: 6.8692 (6.8692) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.7812 ( 0.7812)\n", "pipe": "stderr"}
+{"event": "line", "data": "Test: [ 32/32] Time: 0.043 (0.166) Loss: 6.7062 (6.8682) Acc@1: 0.0000 ( 0.2907) Acc@5: 3.1250 ( 1.2839)\n", "pipe": "stderr"}
+{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"}
+{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/majaguma.2024-04-02_16:55:21.895752/davit_large.D0/20240402-172229-davit_large-224/checkpoint-1.pth.tar', 0.29069767441860467)\n", "pipe": "stderr"}
+{"event": "line", "data": "\n", "pipe": "stderr"}
+{"event": "data", "data": {"rate": 308.3962937718861, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32796.375, 81920.0], "load": 0.96, "temperature": 75, "power": 325.384}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32796.375, 81920.0], "load": 0.97, "temperature": 75, "power": 309.057}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32796.375, 81920.0], "load": 0.01, "temperature": 70, "power": 96.438}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.885842323303223}, "pipe": "data"}
+{"event": "line", "data": "Train: 2 [ 0/32 ( 0%)] Loss: 6.886 (6.89) Time: 0.929s, 137.86/s (0.929s, 137.86/s) LR: 2.006e-03 Data: 0.515 (0.515)\n", "pipe": "stderr"}
+{"event": "data", "data": {"rate": 224.5434368823345, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.914949417114258}, "pipe": "data"}
+{"event": "data", "data": {"rate": 304.4823578176238, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33040.375, 81920.0], "load": 0.99, "temperature": 75, "power": 288.392}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.931748867034912}, "pipe": "data"}
+{"event": "data", "data": {"rate": 274.70201361061345, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.940550327301025}, "pipe": "data"}
+{"event": "data", "data": {"rate": 307.3401215247171, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"}
+{"event": "data", "data": {"rate": 275.03594899808564, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.992883205413818}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33040.375, 81920.0], "load": 0.98, "temperature": 74, "power": 290.365}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 304.6667464560313, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.91033935546875}, "pipe": "data"}
+{"event": "data", "data": {"rate": 274.31696800164144, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.011209487915039}, "pipe": "data"}
+{"event": "data", "data": {"rate": 305.79546016808393, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33040.375, 81920.0], "load": 1.0, "temperature": 75, "power": 261.956}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.89418888092041}, "pipe": "data"}
+{"event": "data", "data": {"rate": 273.24362302591646, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.969819068908691}, "pipe": "data"}
+{"event": "data", "data": {"rate": 303.51021490541086, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"}
+{"event": "data", "data": {"rate": 272.88291211638676, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.095127105712891}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33040.375, 81920.0], "load": 0.99, "temperature": 76, "power": 202.157}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 305.5853286828446, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.027336120605469}, "pipe": "data"}
+{"event": "line", "data": "Train: 2 [ 31/32 (100%)] Loss: 6.948 (6.96) Time: 0.417s, 307.15/s (0.439s, 291.84/s) LR: 2.006e-03 Data: 0.000 (0.023)\n", "pipe": "stderr"}
+{"event": "data", "data": {"rate": 276.3652362514106, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"}
+{"event": "line", "data": "Test: [ 0/32] Time: 0.578 (0.578) Loss: 6.7616 (6.7616) Acc@1: 0.7812 ( 0.7812) Acc@5: 2.3438 ( 2.3438)\n", "pipe": "stderr"}
+{"event": "line", "data": "Test: [ 32/32] Time: 0.043 (0.167) Loss: 6.4838 (6.8220) Acc@1: 3.1250 ( 0.2665) Acc@5: 6.2500 ( 1.0174)\n", "pipe": "stderr"}
+{"event": "data", "data": {"rate": 307.1623750663534, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33284.375, 81920.0], "load": 0.96, "temperature": 76, "power": 303.368}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33284.375, 81920.0], "load": 0.96, "temperature": 76, "power": 319.212}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33284.375, 81920.0], "load": 0.03, "temperature": 71, "power": 99.384}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.863471984863281}, "pipe": "data"}
+{"event": "line", "data": "Train: 3 [ 0/32 ( 0%)] Loss: 6.863 (6.86) Time: 0.943s, 135.77/s (0.943s, 135.77/s) LR: 3.004e-03 Data: 0.529 (0.529)\n", "pipe": "stderr"}
+{"event": "data", "data": {"rate": 248.52663798014294, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.857139587402344}, "pipe": "data"}
+{"event": "data", "data": {"rate": 272.7562799120878, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33528.375, 81920.0], "load": 0.99, "temperature": 75, "power": 270.234}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.905974388122559}, "pipe": "data"}
+{"event": "data", "data": {"rate": 306.0472700194181, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"}
+{"event": "data", "data": {"rate": 272.6535054306071, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.89678955078125}, "pipe": "data"}
+{"event": "data", "data": {"rate": 305.19799903385604, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.914050102233887}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33528.375, 81920.0], "load": 1.0, "temperature": 76, "power": 337.009}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 273.5052933049932, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9702558517456055}, "pipe": "data"}
+{"event": "data", "data": {"rate": 302.962982226593, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.908979415893555}, "pipe": "data"}
+{"event": "data", "data": {"rate": 273.30194071942975, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33528.375, 81920.0], "load": 0.96, "temperature": 76, "power": 196.336}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.006004810333252}, "pipe": "data"}
+{"event": "data", "data": {"rate": 303.67930274953716, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"}
+{"event": "data", "data": {"rate": 272.00698230023255, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.034702301025391}, "pipe": "data"}
+{"event": "data", "data": {"rate": 303.4957832212892, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33528.375, 81920.0], "load": 0.99, "temperature": 77, "power": 329.645}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.979143142700195}, "pipe": "data"}
+{"event": "data", "data": {"rate": 272.89150292322813, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.01246976852417}, "pipe": "data"}
+{"event": "line", "data": "Train: 3 [ 31/32 (100%)] Loss: 6.999 (6.96) Time: 0.418s, 306.55/s (0.440s, 291.14/s) LR: 3.004e-03 Data: 0.000 (0.023)\n", "pipe": "stderr"}
+{"event": "data", "data": {"rate": 306.7599739224944, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"}
+{"event": "line", "data": "Test: [ 0/32] Time: 0.583 (0.583) Loss: 6.8222 (6.8222) Acc@1: 0.0000 ( 0.0000) Acc@5: 1.5625 ( 1.5625)\n", "pipe": "stderr"}
+{"event": "line", "data": "Test: [ 32/32] Time: 0.043 (0.167) Loss: 6.3795 (6.8159) Acc@1: 0.0000 ( 0.1696) Acc@5: 6.2500 ( 1.0174)\n", "pipe": "stderr"}
+{"event": "data", "data": {"rate": 306.5910363746384, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33772.375, 81920.0], "load": 0.69, "temperature": 76, "power": 325.042}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33772.375, 81920.0], "load": 0.94, "temperature": 77, "power": 232.118}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33772.375, 81920.0], "load": 0.03, "temperature": 72, "power": 100.469}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.859582901000977}, "pipe": "data"}
+{"event": "line", "data": "Train: 4 [ 0/32 ( 0%)] Loss: 6.860 (6.86) Time: 0.931s, 137.45/s (0.931s, 137.45/s) LR: 4.002e-03 Data: 0.516 (0.516)\n", "pipe": "stderr"}
+{"event": "data", "data": {"rate": 292.9241407052597, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.827597618103027}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34016.375, 81920.0], "load": 0.95, "temperature": 76, "power": 309.68}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 303.44004119255453, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.876450061798096}, "pipe": "data"}
+{"event": "data", "data": {"rate": 272.88169537397914, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.914443016052246}, "pipe": "data"}
+{"event": "data", "data": {"rate": 304.23844681931166, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34016.375, 81920.0], "load": 0.96, "temperature": 76, "power": 290.365}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 275.6354645354039, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.918074607849121}, "pipe": "data"}
+{"event": "data", "data": {"rate": 305.6354173339756, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9963908195495605}, "pipe": "data"}
+{"event": "data", "data": {"rate": 273.21656186972285, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34016.375, 81920.0], "load": 0.96, "temperature": 76, "power": 308.465}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.041259288787842}, "pipe": "data"}
+{"event": "data", "data": {"rate": 303.067883017303, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.073038101196289}, "pipe": "data"}
+{"event": "data", "data": {"rate": 273.5645263834793, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.011545658111572}, "pipe": "data"}
+{"event": "data", "data": {"rate": 303.4469791712168, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34016.375, 81920.0], "load": 0.98, "temperature": 77, "power": 313.691}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 272.07464131419607, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.041287422180176}, "pipe": "data"}
+{"event": "data", "data": {"rate": 303.56948214952433, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"}
+{"event": "stop", "data": null, "pipe": "data"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-davit_large.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "davit_large", "--batch-size", "128", "--lr-base", "0.01", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/majaguma.2024-04-02_16:55:21.895752/davit_large.D0", "--checkpoint-hist", "1"], "time": 1712078652.640405, "return_code": -15}, "pipe": null}
diff --git a/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/dlrm.0.data b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/dlrm.0.data
new file mode 100644
index 000000000..852327c08
--- /dev/null
+++ b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/dlrm.0.data
@@ -0,0 +1,282 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/dlrm", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "dlrm", "install_group": "torch", "install_variant": "cuda", "run_name": "majaguma.2024-04-02_16:55:21.895752", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "b7a983fcf74c005cfc04d84913c69872", "tags": ["nlp", "rl"], "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/dlrm", "plan": {"method": "njobs", "n": 1}, "argv": {"--num-batches": 1000, "--data-generation": "random", "--arch-mlp-bot": "512-512-64", "--arch-mlp-top": "1024-1024-1024-1", "--arch-sparse-feature-size": 64, "--arch-embedding-size": "1000000-1000000-1000000-1000000-1000000-1000000-1000000-1000000", "--num-indices-per-lookup": 100, "--arch-interaction-op": "dot", "--numpy-rand-seed": "727", "--print-freq": 999999, "--mini-batch-size": 16384, "--test-mini-batch-size": 16384, "--test-num-workers": 0, "--use-gpu": true}, "weight": 1.0, "name": "dlrm", "tag": ["dlrm", "0"], "job-number": 0, "devices": ["0"]}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "trustingspider", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-b19ad74c-adf3-683a-1bdd-0ce516ef4aae": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 74, "power": 106.635, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712079069.549035, "milabench": {"tag": "paice-v1-10-g1243bba", "commit": "1243bbae76bfc8105d553472dcfce834b54a723e", "date": "2024-04-02 12:02:23 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/dlrm/voirconf-dlrm.0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/dlrm/dlrm/dlrm_s_pytorch.py", "--num-batches", "1000", "--data-generation", "random", "--arch-mlp-bot", "512-512-64", "--arch-mlp-top", "1024-1024-1024-1", "--arch-sparse-feature-size", "64", "--arch-embedding-size", "1000000-1000000-1000000-1000000-1000000-1000000-1000000-1000000", "--num-indices-per-lookup", "100", "--arch-interaction-op", "dot", "--numpy-rand-seed", "727", "--print-freq", "999999", "--mini-batch-size", "16384", "--test-mini-batch-size", "16384", "--test-num-workers", "0", "--use-gpu"], "time": 1712079069.565685}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "line", "data": "Unable to import mlperf_logging, No module named 'mlperf_logging'\n", "pipe": "stdout"}
+{"event": "line", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py:347: UserWarning: torch.distributed.reduce_op is deprecated, please use torch.distributed.ReduceOp instead\n", "pipe": "stderr"}
+{"event": "line", "data": " warnings.warn(\n", "pipe": "stderr"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "line", "data": "world size: 1, current rank: 0, local rank: 0\n", "pipe": "stdout"}
+{"event": "line", "data": "Using 1 GPU(s)...\n", "pipe": "stdout"}
+{"event": "line", "data": "time/loss/accuracy (if enabled):\n", "pipe": "stdout"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [887.5625, 81920.0], "load": 0, "temperature": 68, "power": 62.248}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [887.5625, 81920.0], "load": 0, "temperature": 67, "power": 61.265}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3298.375, 81920.0], "load": 0, "temperature": 67, "power": 92.45}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08395528793334961}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5700.375, 81920.0], "load": 0, "temperature": 66, "power": 91.645}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08342313766479492}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5702.375, 81920.0], "load": 0, "temperature": 66, "power": 90.754}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5702.375, 81920.0], "load": 0, "temperature": 65, "power": 90.071}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.0849374458193779}, "pipe": "data"}
+{"event": "data", "data": {"rate": 420915.74093591835, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6106.375, 81920.0], "load": 0, "temperature": 64, "power": 89.168}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08372959494590759}, "pipe": "data"}
+{"event": "data", "data": {"rate": 415392.1765862972, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6308.375, 81920.0], "load": 0, "temperature": 64, "power": 88.375}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08384181559085846}, "pipe": "data"}
+{"event": "data", "data": {"rate": 416500.3698779529, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6308.375, 81920.0], "load": 0, "temperature": 63, "power": 88.07}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08388040214776993}, "pipe": "data"}
+{"event": "data", "data": {"rate": 419148.13156482705, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6308.375, 81920.0], "load": 0, "temperature": 63, "power": 87.289}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08414746820926666}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6308.375, 81920.0], "load": 0, "temperature": 62, "power": 86.582}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 417720.52885295433, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6308.375, 81920.0], "load": 0, "temperature": 62, "power": 85.997}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08342362195253372}, "pipe": "data"}
+{"event": "data", "data": {"rate": 419205.0997282894, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6510.375, 81920.0], "load": 0, "temperature": 61, "power": 85.606}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08308812975883484}, "pipe": "data"}
+{"event": "data", "data": {"rate": 419477.907718752, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6510.375, 81920.0], "load": 0, "temperature": 61, "power": 85.227}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08345107734203339}, "pipe": "data"}
+{"event": "data", "data": {"rate": 421598.5036958574, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6510.375, 81920.0], "load": 0, "temperature": 61, "power": 84.532}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08459055423736572}, "pipe": "data"}
+{"event": "data", "data": {"rate": 419196.89458612987, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6510.375, 81920.0], "load": 0.18, "temperature": 60, "power": 83.543}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6510.375, 81920.0], "load": 0, "temperature": 60, "power": 82.847}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08355697244405746}, "pipe": "data"}
+{"event": "data", "data": {"rate": 418015.00787158695, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6510.375, 81920.0], "load": 0, "temperature": 59, "power": 82.273}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08368614315986633}, "pipe": "data"}
+{"event": "data", "data": {"rate": 417431.579972925, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6510.375, 81920.0], "load": 0, "temperature": 59, "power": 81.664}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08400876820087433}, "pipe": "data"}
+{"event": "data", "data": {"rate": 419955.515063751, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6510.375, 81920.0], "load": 0, "temperature": 58, "power": 81.381}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08406656980514526}, "pipe": "data"}
+{"event": "data", "data": {"rate": 418416.9339339734, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6510.375, 81920.0], "load": 0.18, "temperature": 58, "power": 80.99}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6510.375, 81920.0], "load": 0, "temperature": 58, "power": 80.502}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08438296616077423}, "pipe": "data"}
+{"event": "data", "data": {"rate": 419896.53486945486, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6510.375, 81920.0], "load": 0, "temperature": 57, "power": 80.272}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08476312458515167}, "pipe": "data"}
+{"event": "data", "data": {"rate": 419037.27464949555, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6510.375, 81920.0], "load": 0, "temperature": 57, "power": 79.5}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08354128897190094}, "pipe": "data"}
+{"event": "data", "data": {"rate": 418406.22724529885, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6510.375, 81920.0], "load": 0, "temperature": 56, "power": 79.306}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08408722281455994}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6712.375, 81920.0], "load": 0, "temperature": 56, "power": 79.874}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 414229.6689623833, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6712.375, 81920.0], "load": 0, "temperature": 56, "power": 78.512}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08347434550523758}, "pipe": "data"}
+{"event": "data", "data": {"rate": 416981.4260530626, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6712.375, 81920.0], "load": 0, "temperature": 55, "power": 77.545}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08404223620891571}, "pipe": "data"}
+{"event": "data", "data": {"rate": 419046.8561344672, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6712.375, 81920.0], "load": 0, "temperature": 55, "power": 77.141}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08309965580701828}, "pipe": "data"}
+{"event": "data", "data": {"rate": 418063.53958507243, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6712.375, 81920.0], "load": 0, "temperature": 55, "power": 77.044}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08461501449346542}, "pipe": "data"}
+{"event": "data", "data": {"rate": 417284.8534792512, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6712.375, 81920.0], "load": 0, "temperature": 55, "power": 78.114}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6712.375, 81920.0], "load": 0, "temperature": 54, "power": 76.458}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08360610902309418}, "pipe": "data"}
+{"event": "data", "data": {"rate": 418065.08638540324, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6712.375, 81920.0], "load": 0, "temperature": 54, "power": 76.36}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08310449123382568}, "pipe": "data"}
+{"event": "data", "data": {"rate": 418999.7996043439, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6712.375, 81920.0], "load": 0, "temperature": 54, "power": 76.751}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08348263800144196}, "pipe": "data"}
+{"event": "data", "data": {"rate": 419633.91909368907, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6712.375, 81920.0], "load": 0, "temperature": 54, "power": 76.054}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08347773551940918}, "pipe": "data"}
+{"event": "data", "data": {"rate": 418704.8521539214, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6712.375, 81920.0], "load": 0.18, "temperature": 54, "power": 76.054}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6712.375, 81920.0], "load": 0, "temperature": 53, "power": 75.86}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.0839482992887497}, "pipe": "data"}
+{"event": "data", "data": {"rate": 418186.6259914831, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6916.375, 81920.0], "load": 0, "temperature": 53, "power": 75.665}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08302360773086548}, "pipe": "data"}
+{"event": "data", "data": {"rate": 420836.14624949946, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6916.375, 81920.0], "load": 0, "temperature": 53, "power": 75.469}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08247873932123184}, "pipe": "data"}
+{"event": "data", "data": {"rate": 416391.08882406726, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6916.375, 81920.0], "load": 0, "temperature": 53, "power": 75.273}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08412615209817886}, "pipe": "data"}
+{"event": "data", "data": {"rate": 419035.20622087223, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6916.375, 81920.0], "load": 0, "temperature": 53, "power": 75.077}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6916.375, 81920.0], "load": 0, "temperature": 52, "power": 74.663}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08320017158985138}, "pipe": "data"}
+{"event": "data", "data": {"rate": 418117.43899087526, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6916.375, 81920.0], "load": 0, "temperature": 52, "power": 74.467}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08296072483062744}, "pipe": "data"}
+{"event": "data", "data": {"rate": 418482.41479882965, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6916.375, 81920.0], "load": 0, "temperature": 52, "power": 74.663}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08382801711559296}, "pipe": "data"}
+{"event": "data", "data": {"rate": 412173.5083427148, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6916.375, 81920.0], "load": 0, "temperature": 52, "power": 74.076}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08423185348510742}, "pipe": "data"}
+{"event": "data", "data": {"rate": 417467.37090164266, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6916.375, 81920.0], "load": 0, "temperature": 51, "power": 74.076}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6916.375, 81920.0], "load": 0, "temperature": 51, "power": 73.686}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08362126350402832}, "pipe": "data"}
+{"event": "data", "data": {"rate": 415766.4188528958, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 51, "power": 73.588}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08386819064617157}, "pipe": "data"}
+{"event": "data", "data": {"rate": 419603.59087861085, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 51, "power": 73.283}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08274102210998535}, "pipe": "data"}
+{"event": "data", "data": {"rate": 418848.9491976157, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 50, "power": 73.185}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08267556130886078}, "pipe": "data"}
+{"event": "data", "data": {"rate": 417535.1830858903, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 50, "power": 73.087}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.0836000144481659}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 50, "power": 72.891}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 417403.8874303701, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 50, "power": 72.696}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08373723179101944}, "pipe": "data"}
+{"event": "data", "data": {"rate": 411420.2259345911, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 49, "power": 72.598}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.0839206874370575}, "pipe": "data"}
+{"event": "data", "data": {"rate": 419378.19043481385, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 49, "power": 72.403}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08366947621107101}, "pipe": "data"}
+{"event": "data", "data": {"rate": 418089.8580852362, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 49, "power": 72.403}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08206789195537567}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0.15, "temperature": 50, "power": 72.794}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 422089.82794944674, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 49, "power": 71.988}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08495134860277176}, "pipe": "data"}
+{"event": "data", "data": {"rate": 415339.64079712157, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 48, "power": 71.988}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.0824621394276619}, "pipe": "data"}
+{"event": "data", "data": {"rate": 417080.48446708685, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 48, "power": 71.792}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08413124084472656}, "pipe": "data"}
+{"event": "data", "data": {"rate": 416174.4852171312, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 48, "power": 71.89}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08249908685684204}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 50, "power": 209.78}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 414542.654247997, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 48, "power": 71.207}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08293187618255615}, "pipe": "data"}
+{"event": "data", "data": {"rate": 416572.51790449483, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 48, "power": 71.207}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08295343071222305}, "pipe": "data"}
+{"event": "data", "data": {"rate": 420874.56676622445, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 47, "power": 71.128}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08349806070327759}, "pipe": "data"}
+{"event": "data", "data": {"rate": 419082.21710658615, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 47, "power": 71.097}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08397036790847778}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 47, "power": 70.92}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 421359.3493927722, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 47, "power": 70.803}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08261168003082275}, "pipe": "data"}
+{"event": "data", "data": {"rate": 417800.16381520097, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 47, "power": 70.725}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08328827470541}, "pipe": "data"}
+{"event": "data", "data": {"rate": 417528.0433637939, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 47, "power": 70.607}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08417266607284546}, "pipe": "data"}
+{"event": "data", "data": {"rate": 422818.51170877466, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 47, "power": 70.548}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08396507054567337}, "pipe": "data"}
+{"event": "data", "data": {"rate": 420473.54036026297, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0.06, "temperature": 47, "power": 70.51}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 46, "power": 70.451}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08414334058761597}, "pipe": "data"}
+{"event": "data", "data": {"rate": 418449.20690926455, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 46, "power": 70.608}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.0827147513628006}, "pipe": "data"}
+{"event": "data", "data": {"rate": 418946.26167812065, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 46, "power": 70.236}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08464237302541733}, "pipe": "data"}
+{"event": "data", "data": {"rate": 419338.5722330398, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 46, "power": 70.236}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08371055126190186}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0.11, "temperature": 46, "power": 70.236}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 419383.3646458749, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 46, "power": 70.282}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08391377329826355}, "pipe": "data"}
+{"event": "data", "data": {"rate": 418974.1056920486, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 46, "power": 69.943}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08393114805221558}, "pipe": "data"}
+{"event": "data", "data": {"rate": 415050.5790961286, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 46, "power": 69.864}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08339859545230865}, "pipe": "data"}
+{"event": "data", "data": {"rate": 417903.3957023033, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 45, "power": 70.158}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 45, "power": 69.253}}}, "pipe": "data"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/dlrm/voirconf-dlrm.0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/dlrm/dlrm/dlrm_s_pytorch.py", "--num-batches", "1000", "--data-generation", "random", "--arch-mlp-bot", "512-512-64", "--arch-mlp-top", "1024-1024-1024-1", "--arch-sparse-feature-size", "64", "--arch-embedding-size", "1000000-1000000-1000000-1000000-1000000-1000000-1000000-1000000", "--num-indices-per-lookup", "100", "--arch-interaction-op", "dot", "--numpy-rand-seed", "727", "--print-freq", "999999", "--mini-batch-size", "16384", "--test-mini-batch-size", "16384", "--test-num-workers", "0", "--use-gpu"], "time": 1712079320.690505, "return_code": 0}, "pipe": null}
diff --git a/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/focalnet.D0.data b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/focalnet.D0.data
new file mode 100644
index 000000000..2383a1ad9
--- /dev/null
+++ b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/focalnet.D0.data
@@ -0,0 +1,261 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "timm", "install_group": "torch", "install_variant": "cuda", "run_name": "majaguma.2024-04-02_16:55:21.895752", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "b7a983fcf74c005cfc04d84913c69872", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm", "plan": {"method": "per_gpu"}, "argv": {"--amp": true, "--model": "focalnet_base_lrf"}, "tags": ["classification", "convnet", "vision"], "weight": 2.0, "name": "focalnet", "tag": ["focalnet", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "trustingspider", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-b19ad74c-adf3-683a-1bdd-0ce516ef4aae": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0.52, "memory": 0.010771942138671876}, "temperature": 72, "power": 104.302, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712078779.133667, "milabench": {"tag": "paice-v1-10-g1243bba", "commit": "1243bbae76bfc8105d553472dcfce834b54a723e", "date": "2024-04-02 12:02:23 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-focalnet.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "focalnet_base_lrf", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/majaguma.2024-04-02_16:55:21.895752/focalnet.D0", "--checkpoint-hist", "1"], "time": 1712078779.1497157}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "line", "data": "Training with a single process on 1 device (cuda:0).\n", "pipe": "stderr"}
+{"event": "line", "data": "Model focalnet_base_lrf created, param count:88749768\n", "pipe": "stderr"}
+{"event": "line", "data": "Data processing configuration for current model + dataset:\n", "pipe": "stderr"}
+{"event": "line", "data": "\tinput_size: (3, 224, 224)\n", "pipe": "stderr"}
+{"event": "line", "data": "\tinterpolation: bicubic\n", "pipe": "stderr"}
+{"event": "line", "data": "\tmean: (0.485, 0.456, 0.406)\n", "pipe": "stderr"}
+{"event": "line", "data": "\tstd: (0.229, 0.224, 0.225)\n", "pipe": "stderr"}
+{"event": "line", "data": "\tcrop_pct: 0.9\n", "pipe": "stderr"}
+{"event": "line", "data": "\tcrop_mode: center\n", "pipe": "stderr"}
+{"event": "line", "data": "Learning rate (0.05) calculated from base learning rate (0.1) and global batch size (128) with linear scaling.\n", "pipe": "stderr"}
+{"event": "line", "data": "Using native Torch AMP. Training in mixed precision.\n", "pipe": "stderr"}
+{"event": "line", "data": "Scheduled epochs: 300. LR stepped per epoch.\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "train", "loss": 7.004467010498047}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4374.375, 81920.0], "load": 0.99, "temperature": 73, "power": 263.955}}}, "pipe": "data"}
+{"event": "line", "data": "Train: 0 [ 0/32 ( 0%)] Loss: 7.004 (7.00) Time: 11.523s, 11.11/s (11.523s, 11.11/s) LR: 1.000e-05 Data: 0.678 (0.678)\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [21896.375, 81920.0], "load": 1.0, "temperature": 70, "power": 154.99}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [10120.375, 81920.0], "load": 0.99, "temperature": 71, "power": 228.174}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7748.375, 81920.0], "load": 1.0, "temperature": 71, "power": 193.198}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.006728649139404}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.935497760772705}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [23768.375, 81920.0], "load": 0.99, "temperature": 73, "power": 302.517}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.995425701141357}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.060293197631836}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [23768.375, 81920.0], "load": 0.96, "temperature": 74, "power": 318.382}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.056240081787109}, "pipe": "data"}
+{"event": "data", "data": {"rate": 368.98203453859213, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.029513359069824}, "pipe": "data"}
+{"event": "data", "data": {"rate": 395.8628346287947, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.975472450256348}, "pipe": "data"}
+{"event": "data", "data": {"rate": 388.85075726776165, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [23768.375, 81920.0], "load": 0.97, "temperature": 75, "power": 308.802}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 372.5438146282206, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.069397926330566}, "pipe": "data"}
+{"event": "line", "data": "Train: 0 [ 31/32 (100%)] Loss: 7.005 (7.00) Time: 0.324s, 395.46/s (0.681s, 188.06/s) LR: 1.000e-05 Data: 0.000 (0.028)\n", "pipe": "stderr"}
+{"event": "data", "data": {"rate": 362.3881519639836, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"}
+{"event": "line", "data": "Test: [ 0/32] Time: 0.687 (0.687) Loss: 6.9615 (6.9615) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"}
+{"event": "line", "data": "Test: [ 32/32] Time: 1.007 (0.159) Loss: 6.8639 (6.9459) Acc@1: 0.0000 ( 0.1453) Acc@5: 3.1250 ( 0.6541)\n", "pipe": "stderr"}
+{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"}
+{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/majaguma.2024-04-02_16:55:21.895752/focalnet.D0/20240402-172624-focalnet_base_lrf-224/checkpoint-0.pth.tar', 0.14534883720930233)\n", "pipe": "stderr"}
+{"event": "line", "data": "\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24012.375, 81920.0], "load": 0.85, "temperature": 74, "power": 237.08}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 395.5034862581757, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24012.375, 81920.0], "load": 0.94, "temperature": 75, "power": 278.374}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.020679950714111}, "pipe": "data"}
+{"event": "line", "data": "Train: 1 [ 0/32 ( 0%)] Loss: 7.021 (7.02) Time: 0.857s, 149.30/s (0.857s, 149.30/s) LR: 1.001e-02 Data: 0.531 (0.531)\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [23740.375, 81920.0], "load": 0.62, "temperature": 74, "power": 291.467}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 391.494881363471, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.986495018005371}, "pipe": "data"}
+{"event": "data", "data": {"rate": 392.56737588635076, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"}
+{"event": "data", "data": {"rate": 375.91260199597957, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.065654754638672}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [23840.375, 81920.0], "load": 0.95, "temperature": 75, "power": 282.157}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 365.97239663491354, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.077932357788086}, "pipe": "data"}
+{"event": "data", "data": {"rate": 380.49319210482963, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.056328773498535}, "pipe": "data"}
+{"event": "data", "data": {"rate": 395.28729500287966, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [23840.375, 81920.0], "load": 0.99, "temperature": 76, "power": 293.64}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.036379337310791}, "pipe": "data"}
+{"event": "data", "data": {"rate": 384.4103988482728, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.007882595062256}, "pipe": "data"}
+{"event": "data", "data": {"rate": 368.79133166424015, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"}
+{"event": "data", "data": {"rate": 392.34255392418385, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.197965621948242}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [23840.375, 81920.0], "load": 0.99, "temperature": 76, "power": 270.557}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 372.53832206644216, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.10841703414917}, "pipe": "data"}
+{"event": "line", "data": "Train: 1 [ 31/32 (100%)] Loss: 7.108 (7.05) Time: 0.326s, 393.10/s (0.347s, 368.83/s) LR: 1.001e-02 Data: 0.000 (0.023)\n", "pipe": "stderr"}
+{"event": "line", "data": "Test: [ 0/32] Time: 0.564 (0.564) Loss: 6.8922 (6.8922) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"}
+{"event": "line", "data": "Test: [ 32/32] Time: 0.031 (0.128) Loss: 6.9393 (6.9700) Acc@1: 0.0000 ( 0.2665) Acc@5: 0.0000 ( 0.9932)\n", "pipe": "stderr"}
+{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"}
+{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/majaguma.2024-04-02_16:55:21.895752/focalnet.D0/20240402-172624-focalnet_base_lrf-224/checkpoint-1.pth.tar', 0.26647286821705424)\n", "pipe": "stderr"}
+{"event": "line", "data": "\n", "pipe": "stderr"}
+{"event": "data", "data": {"rate": 394.5168048994647, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24084.375, 81920.0], "load": 0.97, "temperature": 75, "power": 298.633}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24084.375, 81920.0], "load": 0.03, "temperature": 72, "power": 100.47}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.995872497558594}, "pipe": "data"}
+{"event": "line", "data": "Train: 2 [ 0/32 ( 0%)] Loss: 6.996 (7.00) Time: 0.856s, 149.57/s (0.856s, 149.57/s) LR: 2.001e-02 Data: 0.532 (0.532)\n", "pipe": "stderr"}
+{"event": "data", "data": {"rate": 332.56051552387936, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.097558975219727}, "pipe": "data"}
+{"event": "data", "data": {"rate": 354.0308306531893, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24330.375, 81920.0], "load": 0.99, "temperature": 75, "power": 182.324}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 394.1809499198877, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0096845626831055}, "pipe": "data"}
+{"event": "data", "data": {"rate": 390.19721597659975, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.010942459106445}, "pipe": "data"}
+{"event": "data", "data": {"rate": 370.4053620304631, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24330.375, 81920.0], "load": 0.99, "temperature": 76, "power": 261.623}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.231774806976318}, "pipe": "data"}
+{"event": "data", "data": {"rate": 386.3811518975614, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.112374305725098}, "pipe": "data"}
+{"event": "data", "data": {"rate": 391.83570272658295, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"}
+{"event": "data", "data": {"rate": 371.966992764963, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.235927104949951}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24330.375, 81920.0], "load": 0.99, "temperature": 76, "power": 254.494}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 354.64402026936494, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.224554061889648}, "pipe": "data"}
+{"event": "data", "data": {"rate": 395.2725910726848, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"}
+{"event": "line", "data": "Train: 2 [ 31/32 (100%)] Loss: 7.261 (7.13) Time: 0.323s, 396.27/s (0.347s, 368.57/s) LR: 2.001e-02 Data: 0.000 (0.023)\n", "pipe": "stderr"}
+{"event": "line", "data": "Test: [ 0/32] Time: 0.554 (0.554) Loss: 6.9301 (6.9301) Acc@1: 0.0000 ( 0.0000) Acc@5: 6.2500 ( 6.2500)\n", "pipe": "stderr"}
+{"event": "line", "data": "Test: [ 32/32] Time: 0.031 (0.127) Loss: 6.7457 (7.1150) Acc@1: 0.0000 ( 0.1696) Acc@5: 0.0000 ( 0.8479)\n", "pipe": "stderr"}
+{"event": "data", "data": {"rate": 395.8798593328756, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24500.375, 81920.0], "load": 0, "temperature": 72, "power": 100.83}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24574.375, 81920.0], "load": 0.94, "temperature": 77, "power": 300.303}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.089291095733643}, "pipe": "data"}
+{"event": "line", "data": "Train: 3 [ 0/32 ( 0%)] Loss: 7.089 (7.09) Time: 0.846s, 151.34/s (0.846s, 151.34/s) LR: 3.000e-02 Data: 0.523 (0.523)\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24818.375, 81920.0], "load": 0.99, "temperature": 76, "power": 292.251}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 344.9315660690889, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.231501579284668}, "pipe": "data"}
+{"event": "data", "data": {"rate": 387.90893584782026, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0426130294799805}, "pipe": "data"}
+{"event": "data", "data": {"rate": 394.4316636815804, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24818.375, 81920.0], "load": 0.95, "temperature": 76, "power": 284.3}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 393.9493831303574, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.210793495178223}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.230088710784912}, "pipe": "data"}
+{"event": "data", "data": {"rate": 382.9996940381012, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.394797325134277}, "pipe": "data"}
+{"event": "data", "data": {"rate": 369.8277081344691, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24818.375, 81920.0], "load": 0.99, "temperature": 77, "power": 300.569}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 393.3392875614293, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.26943826675415}, "pipe": "data"}
+{"event": "data", "data": {"rate": 388.21333522997116, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.293475151062012}, "pipe": "data"}
+{"event": "data", "data": {"rate": 371.74547905583785, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24818.375, 81920.0], "load": 0.97, "temperature": 77, "power": 277.306}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.378293037414551}, "pipe": "data"}
+{"event": "data", "data": {"rate": 355.4388159548853, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"}
+{"event": "line", "data": "Train: 3 [ 31/32 (100%)] Loss: 7.239 (7.23) Time: 0.324s, 395.27/s (0.347s, 368.59/s) LR: 3.000e-02 Data: 0.000 (0.023)\n", "pipe": "stderr"}
+{"event": "line", "data": "Test: [ 0/32] Time: 0.552 (0.552) Loss: 7.1712 (7.1712) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.7812 ( 0.7812)\n", "pipe": "stderr"}
+{"event": "line", "data": "Test: [ 32/32] Time: 0.031 (0.127) Loss: 6.2444 (7.1748) Acc@1: 0.0000 ( 0.2180) Acc@5: 25.0000 ( 1.0174)\n", "pipe": "stderr"}
+{"event": "data", "data": {"rate": 394.31335268558576, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25062.375, 81920.0], "load": 0.93, "temperature": 77, "power": 302.752}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25062.375, 81920.0], "load": 0, "temperature": 72, "power": 100.538}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.215520858764648}, "pipe": "data"}
+{"event": "line", "data": "Train: 4 [ 0/32 ( 0%)] Loss: 7.216 (7.22) Time: 0.856s, 149.47/s (0.856s, 149.47/s) LR: 4.000e-02 Data: 0.531 (0.531)\n", "pipe": "stderr"}
+{"event": "data", "data": {"rate": 391.40477399501464, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.295672416687012}, "pipe": "data"}
+{"event": "data", "data": {"rate": 392.74346313024085, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25306.375, 81920.0], "load": 0.96, "temperature": 77, "power": 282.309}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.311964988708496}, "pipe": "data"}
+{"event": "data", "data": {"rate": 384.63908900090854, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.187459945678711}, "pipe": "data"}
+{"event": "data", "data": {"rate": 371.23209789037753, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"}
+{"event": "data", "data": {"rate": 357.8944851583026, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.408051490783691}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25306.375, 81920.0], "load": 0.96, "temperature": 77, "power": 326.429}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 394.0250688091481, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.321699142456055}, "pipe": "data"}
+{"event": "data", "data": {"rate": 393.6484191984159, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.464118003845215}, "pipe": "data"}
+{"event": "data", "data": {"rate": 384.6007968273487, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25306.375, 81920.0], "load": 0.96, "temperature": 77, "power": 304.821}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.423360824584961}, "pipe": "data"}
+{"event": "data", "data": {"rate": 370.5561659739286, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"}
+{"event": "line", "data": "Train: 4 [ 31/32 (100%)] Loss: 7.404 (7.33) Time: 0.323s, 395.67/s (0.348s, 368.04/s) LR: 4.000e-02 Data: 0.000 (0.024)\n", "pipe": "stderr"}
+{"event": "data", "data": {"rate": 384.2803640365545, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"}
+{"event": "line", "data": "Test: [ 0/32] Time: 0.565 (0.565) Loss: 6.9695 (6.9695) Acc@1: 0.0000 ( 0.0000) Acc@5: 4.6875 ( 4.6875)\n", "pipe": "stderr"}
+{"event": "line", "data": "Test: [ 32/32] Time: 0.032 (0.128) Loss: 6.8203 (7.2576) Acc@1: 0.0000 ( 0.1211) Acc@5: 3.1250 ( 0.7025)\n", "pipe": "stderr"}
+{"event": "data", "data": {"rate": 395.6080105541047, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25550.375, 81920.0], "load": 0.95, "temperature": 77, "power": 336.296}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25550.375, 81920.0], "load": 0.94, "temperature": 77, "power": 305.174}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.211348533630371}, "pipe": "data"}
+{"event": "line", "data": "Train: 5 [ 0/32 ( 0%)] Loss: 7.211 (7.21) Time: 0.862s, 148.43/s (0.862s, 148.43/s) LR: 4.997e-02 Data: 0.539 (0.539)\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25794.375, 81920.0], "load": 0.94, "temperature": 77, "power": 297.928}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 370.3252794814747, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.400300025939941}, "pipe": "data"}
+{"event": "data", "data": {"rate": 371.0778671168573, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.457738399505615}, "pipe": "data"}
+{"event": "data", "data": {"rate": 395.0807654349073, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.70676326751709}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25794.375, 81920.0], "load": 0.96, "temperature": 77, "power": 308.062}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 394.78057517340005, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.551183223724365}, "pipe": "data"}
+{"event": "data", "data": {"rate": 380.9927983163714, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"}
+{"event": "data", "data": {"rate": 370.98387457628, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.540083885192871}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25794.375, 81920.0], "load": 0.96, "temperature": 77, "power": 289.287}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 393.4815756110184, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.490669250488281}, "pipe": "data"}
+{"event": "data", "data": {"rate": 383.8043983102495, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.45649528503418}, "pipe": "data"}
+{"event": "data", "data": {"rate": 371.53194665432846, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25794.375, 81920.0], "load": 0.95, "temperature": 78, "power": 307.089}}}, "pipe": "data"}
+{"event": "line", "data": "Train: 5 [ 31/32 (100%)] Loss: 7.408 (7.45) Time: 0.324s, 394.75/s (0.348s, 368.20/s) LR: 4.997e-02 Data: 0.000 (0.024)\n", "pipe": "stderr"}
+{"event": "data", "data": {"rate": 365.4343842059815, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"}
+{"event": "line", "data": "Test: [ 0/32] Time: 0.543 (0.543) Loss: 7.3016 (7.3016) Acc@1: 0.0000 ( 0.0000) Acc@5: 6.2500 ( 6.2500)\n", "pipe": "stderr"}
+{"event": "line", "data": "Test: [ 32/32] Time: 0.031 (0.127) Loss: 6.8042 (7.2021) Acc@1: 0.0000 ( 0.2422) Acc@5: 0.0000 ( 0.9932)\n", "pipe": "stderr"}
+{"event": "data", "data": {"rate": 394.872320086785, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [26038.375, 81920.0], "load": 0.93, "temperature": 77, "power": 304.187}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [26038.375, 81920.0], "load": 0, "temperature": 73, "power": 101.22}}}, "pipe": "data"}
+{"event": "stop", "data": null, "pipe": "data"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-focalnet.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "focalnet_base_lrf", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/majaguma.2024-04-02_16:55:21.895752/focalnet.D0", "--checkpoint-hist", "1"], "time": 1712078894.5490353, "return_code": -15}, "pipe": null}
diff --git a/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/fp16.D0.data b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/fp16.D0.data
new file mode 100644
index 000000000..9f434d4b2
--- /dev/null
+++ b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/fp16.D0.data
@@ -0,0 +1,141 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/flops", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "flops", "install_group": "torch", "install_variant": "cuda", "run_name": "majaguma.2024-04-02_16:55:21.895752", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "b7a983fcf74c005cfc04d84913c69872", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops", "plan": {"method": "per_gpu"}, "tags": ["diagnostic", "flops"], "argv": {"--number": 30, "--repeat": 90, "--m": 8192, "--n": 8192, "--dtype": "fp16"}, "weight": 0.0, "name": "fp16", "tag": ["fp16", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "trustingspider", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-b19ad74c-adf3-683a-1bdd-0ce516ef4aae": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 70, "power": 101.359, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712077244.530782, "milabench": {"tag": "paice-v1-10-g1243bba", "commit": "1243bbae76bfc8105d553472dcfce834b54a723e", "date": "2024-04-02 12:02:23 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "30", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "fp16"], "time": 1712077244.5408697}, "pipe": null}
+{"event": "data", "data": {"task": "train", "rate": 222.54486758004043, "units": "Tflops", "t": 1712077246.5508718}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [882.5, 81920.0], "load": 0, "temperature": 67, "power": 61.167}}, "t": 1712077245.8868654}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 0.13, "temperature": 69, "power": 303.48}}, "t": 1712077246.4000347}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 254.5317042014051, "units": "Tflops", "t": 1712077246.8107204}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 254.163936526009, "units": "Tflops", "t": 1712077247.070349}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 72, "power": 243.726}}, "t": 1712077246.9122415}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 251.03918272948692, "units": "Tflops", "t": 1712077247.3332746}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 250.60945854785086, "units": "Tflops", "t": 1712077247.5965824}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 71, "power": 247.759}}, "t": 1712077247.419552}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 253.00843885729108, "units": "Tflops", "t": 1712077247.857433}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 253.127405803192, "units": "Tflops", "t": 1712077248.1181223}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 72, "power": 297.768}}, "t": 1712077247.92666}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 254.47950202942943, "units": "Tflops", "t": 1712077248.3774605}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 253.96494904260413, "units": "Tflops", "t": 1712077248.6372814}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 72, "power": 276.781}}, "t": 1712077248.4408026}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 252.09079704420296, "units": "Tflops", "t": 1712077248.8990853}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 252.89767339288477, "units": "Tflops", "t": 1712077249.1600091}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 73, "power": 251.404}}, "t": 1712077248.9483213}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 250.94265082627828, "units": "Tflops", "t": 1712077249.423001}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 252.9265694321592, "units": "Tflops", "t": 1712077249.6838865}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 72, "power": 250.437}}, "t": 1712077249.4564295}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 254.56846925194102, "units": "Tflops", "t": 1712077249.9431415}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 254.40439657648997, "units": "Tflops", "t": 1712077250.2025096}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 72, "power": 287.727}}, "t": 1712077249.9643905}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 254.68210992763017, "units": "Tflops", "t": 1712077250.4616623}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 252.38350557635073, "units": "Tflops", "t": 1712077250.7231083}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 73, "power": 292.192}}, "t": 1712077250.4718866}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 250.82913875918806, "units": "Tflops", "t": 1712077250.9862187}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 73, "power": 259.748}}, "t": 1712077250.9802568}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 250.80981327178375, "units": "Tflops", "t": 1712077251.249353}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 255.00765951103918, "units": "Tflops", "t": 1712077251.5081162}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 72, "power": 235.4}}, "t": 1712077251.488796}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 251.9757852064729, "units": "Tflops", "t": 1712077251.7700298}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 253.13458442981218, "units": "Tflops", "t": 1712077252.0307195}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 73, "power": 287.642}}, "t": 1712077251.9969237}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 251.5819602813517, "units": "Tflops", "t": 1712077252.2930512}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 251.84712398130065, "units": "Tflops", "t": 1712077252.5550547}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 73, "power": 298.546}}, "t": 1712077252.505392}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 253.35082863378213, "units": "Tflops", "t": 1712077252.815552}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 252.3862680424351, "units": "Tflops", "t": 1712077253.0769968}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 72, "power": 266.222}}, "t": 1712077253.0137498}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 257.74672471670175, "units": "Tflops", "t": 1712077253.3330739}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 253.89107880375622, "units": "Tflops", "t": 1712077253.592973}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 73, "power": 240.606}}, "t": 1712077253.521845}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 251.19643361174224, "units": "Tflops", "t": 1712077253.855737}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 250.8511961895104, "units": "Tflops", "t": 1712077254.118787}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 73, "power": 288.795}}, "t": 1712077254.030125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 251.38151241243773, "units": "Tflops", "t": 1712077254.3813171}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 253.09776933317048, "units": "Tflops", "t": 1712077254.6420221}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 73, "power": 300.197}}, "t": 1712077254.5415034}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 254.3529480326505, "units": "Tflops", "t": 1712077254.9014895}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 253.41161961641615, "units": "Tflops", "t": 1712077255.1618736}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 72, "power": 260.432}}, "t": 1712077255.0513716}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 252.18867416543696, "units": "Tflops", "t": 1712077255.4235737}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 250.58721666931706, "units": "Tflops", "t": 1712077255.686902}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 73, "power": 229.698}}, "t": 1712077255.5596206}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 253.21102731285686, "units": "Tflops", "t": 1712077255.947558}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 252.4267911604863, "units": "Tflops", "t": 1712077256.2089658}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 73, "power": 295.812}}, "t": 1712077256.0671718}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 254.88151891581694, "units": "Tflops", "t": 1712077256.4679122}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 250.43752719153935, "units": "Tflops", "t": 1712077256.7313833}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 73, "power": 280.685}}, "t": 1712077256.576084}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 250.58721666931706, "units": "Tflops", "t": 1712077256.9947474}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 252.92148325043715, "units": "Tflops", "t": 1712077257.255644}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 73, "power": 238.083}}, "t": 1712077257.0844693}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 253.40396113113053, "units": "Tflops", "t": 1712077257.5160992}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 255.48704944225506, "units": "Tflops", "t": 1712077257.774371}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 74, "power": 288.596}}, "t": 1712077257.5939236}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 251.82512025180884, "units": "Tflops", "t": 1712077258.036483}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 249.4776599018352, "units": "Tflops", "t": 1712077258.3009775}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 73, "power": 287.026}}, "t": 1712077258.1029863}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 251.67303299353517, "units": "Tflops", "t": 1712077258.5632377}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 251.13442134102067, "units": "Tflops", "t": 1712077258.825992}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 73, "power": 250.509}}, "t": 1712077258.61151}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 251.54468068017985, "units": "Tflops", "t": 1712077259.0883808}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 255.59206781672756, "units": "Tflops", "t": 1712077259.3465555}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 73, "power": 226.252}}, "t": 1712077259.1199312}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 252.7154338482392, "units": "Tflops", "t": 1712077259.6077058}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 255.2793910091135, "units": "Tflops", "t": 1712077259.8661869}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 73, "power": 241.527}}, "t": 1712077259.6283953}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 253.17650647682925, "units": "Tflops", "t": 1712077260.126893}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 252.52999965834513, "units": "Tflops", "t": 1712077260.3881924}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 74, "power": 246.911}}, "t": 1712077260.1375098}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 250.89668767496119, "units": "Tflops", "t": 1712077260.6512637}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 251.33036598129544, "units": "Tflops", "t": 1712077260.9137995}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 74, "power": 236.858}}, "t": 1712077260.6518612}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 254.9007725346199, "units": "Tflops", "t": 1712077261.1727145}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 73, "power": 249.935}}, "t": 1712077261.162053}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 255.4427080558217, "units": "Tflops", "t": 1712077261.4310744}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 252.6406738873113, "units": "Tflops", "t": 1712077261.692263}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 74, "power": 288.801}}, "t": 1712077261.6722648}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 251.36895220888815, "units": "Tflops", "t": 1712077261.9548306}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 251.42239885733068, "units": "Tflops", "t": 1712077262.2172825}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 74, "power": 255.828}}, "t": 1712077262.1897674}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 251.65083080408175, "units": "Tflops", "t": 1712077262.479536}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 255.41677076431165, "units": "Tflops", "t": 1712077262.7378848}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 73, "power": 243.536}}, "t": 1712077262.6980882}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 252.99964807470843, "units": "Tflops", "t": 1712077262.998758}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 253.71531473646385, "units": "Tflops", "t": 1712077263.2588427}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 74, "power": 262.094}}, "t": 1712077263.206447}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 253.99432265464102, "units": "Tflops", "t": 1712077263.5186732}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 254.96489396531246, "units": "Tflops", "t": 1712077263.7774792}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 74, "power": 282.554}}, "t": 1712077263.71479}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 253.8442622040078, "units": "Tflops", "t": 1712077264.0374675}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 252.42610031650585, "units": "Tflops", "t": 1712077264.2988775}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 74, "power": 266.512}}, "t": 1712077264.2248616}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 252.52746451045132, "units": "Tflops", "t": 1712077264.5602367}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 252.48967379569982, "units": "Tflops", "t": 1712077264.8215787}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 74, "power": 266.308}}, "t": 1712077264.7338543}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 252.528616844093, "units": "Tflops", "t": 1712077265.082918}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 252.48253169992324, "units": "Tflops", "t": 1712077265.3442695}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 74, "power": 266.318}}, "t": 1712077265.2411833}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 252.467327293404, "units": "Tflops", "t": 1712077265.605692}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 252.5028070908945, "units": "Tflops", "t": 1712077265.8670201}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 74, "power": 266.512}}, "t": 1712077265.748522}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 252.4479788787993, "units": "Tflops", "t": 1712077266.1284456}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 252.43001514906106, "units": "Tflops", "t": 1712077266.3898485}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 74, "power": 266.413}}, "t": 1712077266.2559}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 252.44176037533407, "units": "Tflops", "t": 1712077266.6512785}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 252.42610031650585, "units": "Tflops", "t": 1712077266.912686}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 74, "power": 266.319}}, "t": 1712077266.76545}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 252.43945730364706, "units": "Tflops", "t": 1712077267.1741214}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 252.44360286294048, "units": "Tflops", "t": 1712077267.4355028}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 74, "power": 265.935}}, "t": 1712077267.2728167}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 252.40675822019162, "units": "Tflops", "t": 1712077267.6969702}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 252.356114393945, "units": "Tflops", "t": 1712077267.9584403}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 74, "power": 266.704}}, "t": 1712077267.7816741}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 252.41850128228725, "units": "Tflops", "t": 1712077268.2198966}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 252.44452411682946, "units": "Tflops", "t": 1712077268.481275}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 74, "power": 266.702}}, "t": 1712077268.2890494}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 252.4201131604045, "units": "Tflops", "t": 1712077268.742742}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 252.35979689603613, "units": "Tflops", "t": 1712077269.0042107}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 74, "power": 266.32}}, "t": 1712077268.7991788}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 252.3176846923445, "units": "Tflops", "t": 1712077269.265771}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 252.4079094524799, "units": "Tflops", "t": 1712077269.5271878}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 74, "power": 267.004}}, "t": 1712077269.3065293}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 252.38557742024466, "units": "Tflops", "t": 1712077269.7886782}, "pipe": "data"}
+{"event": "end", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "30", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "fp16"], "time": 1712077270.259918, "return_code": 0}, "pipe": null}
diff --git a/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/fp32.D0.data b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/fp32.D0.data
new file mode 100644
index 000000000..c95000a56
--- /dev/null
+++ b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/fp32.D0.data
@@ -0,0 +1,301 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/flops", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "flops", "install_group": "torch", "install_variant": "cuda", "run_name": "majaguma.2024-04-02_16:55:21.895752", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "b7a983fcf74c005cfc04d84913c69872", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops", "plan": {"method": "per_gpu"}, "tags": ["diagnostic", "flops"], "argv": {"--number": 10, "--repeat": 90, "--m": 8192, "--n": 8192, "--dtype": "fp32"}, "weight": 0.0, "name": "fp32", "tag": ["fp32", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "trustingspider", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-b19ad74c-adf3-683a-1bdd-0ce516ef4aae": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 71, "power": 99.9, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712077305.368661, "milabench": {"tag": "paice-v1-10-g1243bba", "commit": "1243bbae76bfc8105d553472dcfce834b54a723e", "date": "2024-04-02 12:02:23 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "10", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "fp32"], "time": 1712077305.3792894}, "pipe": null}
+{"event": "data", "data": {"task": "train", "rate": 18.68710130815531, "units": "Tflops", "t": 1712077308.2750032}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [882.5, 81920.0], "load": 0, "temperature": 67, "power": 63.246}}, "t": 1712077306.7332208}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 0.24, "temperature": 72, "power": 236.778}}, "t": 1712077307.2411919}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 73, "power": 294.146}}, "t": 1712077307.7487762}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 74, "power": 288.015}}, "t": 1712077308.255338}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 18.994057850676803, "units": "Tflops", "t": 1712077309.433464}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 74, "power": 282.447}}, "t": 1712077308.7629755}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 74, "power": 283.422}}, "t": 1712077309.2695653}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 18.994527244925973, "units": "Tflops", "t": 1712077310.591394}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 74, "power": 293.064}}, "t": 1712077309.8030024}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 74, "power": 288.106}}, "t": 1712077310.3131642}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 18.994984926656688, "units": "Tflops", "t": 1712077311.7492683}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 74, "power": 287.903}}, "t": 1712077310.8223195}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 74, "power": 295.518}}, "t": 1712077311.3314729}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 18.99354545510755, "units": "Tflops", "t": 1712077312.9072163}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 75, "power": 284.103}}, "t": 1712077311.8417897}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 75, "power": 287.236}}, "t": 1712077312.348321}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 75, "power": 297.354}}, "t": 1712077312.8566823}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 18.9994964231482, "units": "Tflops", "t": 1712077314.064821}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 75, "power": 288.304}}, "t": 1712077313.3668628}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 75, "power": 304.265}}, "t": 1712077313.877035}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 19.002005476128602, "units": "Tflops", "t": 1712077315.2222967}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 75, "power": 289.368}}, "t": 1712077314.3891506}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 75, "power": 302.015}}, "t": 1712077314.8957286}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 19.002326494834936, "units": "Tflops", "t": 1712077316.3796988}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 75, "power": 301.92}}, "t": 1712077315.4022624}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 75, "power": 298.816}}, "t": 1712077315.9100757}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 18.999977828117547, "units": "Tflops", "t": 1712077317.537271}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 76, "power": 291.218}}, "t": 1712077316.4191806}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 76, "power": 303.763}}, "t": 1712077316.926771}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 76, "power": 295.313}}, "t": 1712077317.4343998}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 18.99907765980432, "units": "Tflops", "t": 1712077318.6949356}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 76, "power": 298.623}}, "t": 1712077317.9418545}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 76, "power": 296.472}}, "t": 1712077318.4494684}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 19.000169614128104, "units": "Tflops", "t": 1712077319.852467}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 76, "power": 304.26}}, "t": 1712077318.9560473}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 76, "power": 293.753}}, "t": 1712077319.4626508}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 18.98960758148448, "units": "Tflops", "t": 1712077321.0106518}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 76, "power": 301.829}}, "t": 1712077319.9703536}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 76, "power": 303.276}}, "t": 1712077320.4795644}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 76, "power": 270.131}}, "t": 1712077320.991699}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 19.014223576345586, "units": "Tflops", "t": 1712077322.1673636}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 302.707}}, "t": 1712077321.5008812}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 76, "power": 293.288}}, "t": 1712077322.0100975}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 18.976882256425043, "units": "Tflops", "t": 1712077323.3263178}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 302.902}}, "t": 1712077322.5178225}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 303.479}}, "t": 1712077323.027113}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 18.991163765371457, "units": "Tflops", "t": 1712077324.4844277}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 297.947}}, "t": 1712077323.5346892}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 300.757}}, "t": 1712077324.0412679}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 18.976132631407506, "units": "Tflops", "t": 1712077325.6434276}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 304.074}}, "t": 1712077324.548626}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 295.607}}, "t": 1712077325.0560722}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 303.777}}, "t": 1712077325.5645163}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 18.98503434655105, "units": "Tflops", "t": 1712077326.8019874}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 306.027}}, "t": 1712077326.0718877}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 306.208}}, "t": 1712077326.583812}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 18.98189299001999, "units": "Tflops", "t": 1712077327.9606433}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 290.818}}, "t": 1712077327.0939293}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 303.884}}, "t": 1712077327.6030962}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 18.984788157935984, "units": "Tflops", "t": 1712077329.11915}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 301.936}}, "t": 1712077328.1097026}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 293.646}}, "t": 1712077328.6189256}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 18.988825675807085, "units": "Tflops", "t": 1712077330.2773755}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 290.836}}, "t": 1712077329.1254778}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 305.819}}, "t": 1712077329.6320713}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 303.585}}, "t": 1712077330.1386988}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 18.969415979972116, "units": "Tflops", "t": 1712077331.436827}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 303.58}}, "t": 1712077330.6472082}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 282.043}}, "t": 1712077331.1538332}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 18.993811427982887, "units": "Tflops", "t": 1712077332.5947883}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 304.167}}, "t": 1712077331.6604373}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 287.905}}, "t": 1712077332.1673846}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 18.97320497610451, "units": "Tflops", "t": 1712077333.7539961}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 302.21}}, "t": 1712077332.6750336}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 289.371}}, "t": 1712077333.1827157}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 295.312}}, "t": 1712077333.6903963}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 18.997680610963616, "units": "Tflops", "t": 1712077334.9117212}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 309.114}}, "t": 1712077334.1995592}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 288.107}}, "t": 1712077334.7088869}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 18.979108054642264, "units": "Tflops", "t": 1712077336.0705419}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 307.374}}, "t": 1712077335.216628}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 282.631}}, "t": 1712077335.724383}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 18.979108054642264, "units": "Tflops", "t": 1712077337.2293873}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 306.107}}, "t": 1712077336.2321973}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 306.688}}, "t": 1712077336.7402503}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 18.97066060167822, "units": "Tflops", "t": 1712077338.3887339}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 298.218}}, "t": 1712077337.2468696}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 304.546}}, "t": 1712077337.7548234}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 300.747}}, "t": 1712077338.2634094}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 18.957785300498017, "units": "Tflops", "t": 1712077339.5489593}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 303.78}}, "t": 1712077338.7711053}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 294.929}}, "t": 1712077339.2785685}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 18.97774127457208, "units": "Tflops", "t": 1712077340.7078683}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 284.491}}, "t": 1712077339.785947}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 289.471}}, "t": 1712077340.292524}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 18.922908796930226, "units": "Tflops", "t": 1712077341.8701537}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 304.36}}, "t": 1712077340.8031745}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 308.882}}, "t": 1712077341.309846}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 281.482}}, "t": 1712077341.81746}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 18.99115985504207, "units": "Tflops", "t": 1712077343.0283046}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 305.432}}, "t": 1712077342.3276296}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 298.242}}, "t": 1712077342.8380604}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 18.947768548783912, "units": "Tflops", "t": 1712077344.1890483}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 294.252}}, "t": 1712077343.3446116}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 289.668}}, "t": 1712077343.8523786}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 18.963986814063126, "units": "Tflops", "t": 1712077345.348785}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 290.917}}, "t": 1712077344.3638182}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 296.182}}, "t": 1712077344.870451}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 18.880922208446094, "units": "Tflops", "t": 1712077346.5136507}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 306.877}}, "t": 1712077345.3771138}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 288.976}}, "t": 1712077345.8848608}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 295.606}}, "t": 1712077346.391467}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 18.88866323884558, "units": "Tflops", "t": 1712077347.6780813}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 309.89}}, "t": 1712077346.8993382}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 301.147}}, "t": 1712077347.407996}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 18.884834463596054, "units": "Tflops", "t": 1712077348.8426907}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 279.33}}, "t": 1712077347.9171476}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 303.093}}, "t": 1712077348.4256394}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 18.882309868367123, "units": "Tflops", "t": 1712077350.0074513}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 308.062}}, "t": 1712077348.9322577}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 297.549}}, "t": 1712077349.4425132}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 301.928}}, "t": 1712077349.955026}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 18.89792446104727, "units": "Tflops", "t": 1712077351.1713152}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 300.084}}, "t": 1712077350.4615912}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 307.856}}, "t": 1712077350.970735}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 18.894076448898378, "units": "Tflops", "t": 1712077352.335372}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 299.89}}, "t": 1712077351.483254}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 294.637}}, "t": 1712077351.9938357}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 18.881598618861094, "units": "Tflops", "t": 1712077353.5001707}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 289.472}}, "t": 1712077352.500473}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 287.719}}, "t": 1712077353.0082679}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 18.89564411174342, "units": "Tflops", "t": 1712077354.6641371}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 295.016}}, "t": 1712077353.5165305}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 287.129}}, "t": 1712077354.0231462}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 299.001}}, "t": 1712077354.5310075}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 18.919702582815287, "units": "Tflops", "t": 1712077355.826679}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 286.554}}, "t": 1712077355.0385358}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 288.775}}, "t": 1712077355.5454686}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 18.860224315467267, "units": "Tflops", "t": 1712077356.9928074}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 293.265}}, "t": 1712077356.053057}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 287.804}}, "t": 1712077356.5596998}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 18.895106045052607, "units": "Tflops", "t": 1712077358.1567872}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 284.117}}, "t": 1712077357.067402}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 299.488}}, "t": 1712077357.5739653}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 288.88}}, "t": 1712077358.0806093}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 18.930971829754103, "units": "Tflops", "t": 1712077359.3186383}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 298.815}}, "t": 1712077358.5872436}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 292.674}}, "t": 1712077359.0961425}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 18.93936452063713, "units": "Tflops", "t": 1712077360.4799504}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 293.362}}, "t": 1712077359.6057162}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 285.866}}, "t": 1712077360.1133754}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 18.92450066274071, "units": "Tflops", "t": 1712077361.6421306}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 297.451}}, "t": 1712077360.6219945}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 300.556}}, "t": 1712077361.1305969}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 297.168}}, "t": 1712077361.637259}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 18.86843472129579, "units": "Tflops", "t": 1712077362.807823}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 297.642}}, "t": 1712077362.143834}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 294.826}}, "t": 1712077362.6511762}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 18.894273843978016, "units": "Tflops", "t": 1712077363.9718525}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 284.985}}, "t": 1712077363.1587524}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 292.881}}, "t": 1712077363.666399}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 18.90244419844568, "units": "Tflops", "t": 1712077365.1353765}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 298.23}}, "t": 1712077364.176579}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 314.376}}, "t": 1712077364.6857278}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 18.923064089122487, "units": "Tflops", "t": 1712077366.2976499}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 291.037}}, "t": 1712077365.1948285}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 284.288}}, "t": 1712077365.7039764}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 298.421}}, "t": 1712077366.213254}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 18.928065860475584, "units": "Tflops", "t": 1712077367.459626}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 287.531}}, "t": 1712077366.7251267}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 298.519}}, "t": 1712077367.236897}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 18.928364963178655, "units": "Tflops", "t": 1712077368.6215801}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 293.843}}, "t": 1712077367.7461922}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 299.013}}, "t": 1712077368.252814}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 18.866335140501327, "units": "Tflops", "t": 1712077369.787351}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 299.29}}, "t": 1712077368.760692}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 292.891}}, "t": 1712077369.2688613}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 287.116}}, "t": 1712077369.775489}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 18.915946561796286, "units": "Tflops", "t": 1712077370.9500737}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 284.206}}, "t": 1712077370.2832644}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 298.041}}, "t": 1712077370.7909024}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 18.893449456591526, "units": "Tflops", "t": 1712077372.114177}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 287.23}}, "t": 1712077371.2983148}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 288.283}}, "t": 1712077371.8061888}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 18.881931044026054, "units": "Tflops", "t": 1712077373.2789934}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 302.416}}, "t": 1712077372.312764}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 295.898}}, "t": 1712077372.8229253}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 18.872032883553587, "units": "Tflops", "t": 1712077374.4444017}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 315.529}}, "t": 1712077373.3295374}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 286.341}}, "t": 1712077373.8387468}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 285.381}}, "t": 1712077374.3480346}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 18.852583641765843, "units": "Tflops", "t": 1712077375.6110356}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 291.027}}, "t": 1712077374.8574302}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 283.505}}, "t": 1712077375.365246}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 18.822328582714757, "units": "Tflops", "t": 1712077376.7795346}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 294.131}}, "t": 1712077375.8745782}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 291.706}}, "t": 1712077376.3811822}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 18.886659706251443, "units": "Tflops", "t": 1712077377.9440253}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 286.632}}, "t": 1712077376.8878233}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 293.558}}, "t": 1712077377.3952458}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 280.486}}, "t": 1712077377.9026494}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 18.943188207051758, "units": "Tflops", "t": 1712077379.1051114}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 297.052}}, "t": 1712077378.4092185}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 290.042}}, "t": 1712077378.9157865}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 18.856491870762856, "units": "Tflops", "t": 1712077380.271498}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 292.284}}, "t": 1712077379.4224355}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 287.812}}, "t": 1712077379.9319508}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 18.83273991092385, "units": "Tflops", "t": 1712077381.4393294}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 294.142}}, "t": 1712077380.4425433}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 292.492}}, "t": 1712077380.9491093}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 18.84636234078353, "units": "Tflops", "t": 1712077382.6063101}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 284.79}}, "t": 1712077381.4568737}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 284.986}}, "t": 1712077381.9647448}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 293.956}}, "t": 1712077382.471332}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 18.89598864535747, "units": "Tflops", "t": 1712077383.7703235}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 303.579}}, "t": 1712077382.9798634}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 283.312}}, "t": 1712077383.4868112}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 18.853631843987383, "units": "Tflops", "t": 1712077384.9368927}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 292.892}}, "t": 1712077383.994443}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 292.797}}, "t": 1712077384.502745}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 18.858080292669314, "units": "Tflops", "t": 1712077386.1031506}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 283.119}}, "t": 1712077385.010339}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 284.305}}, "t": 1712077385.5190766}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 292.3}}, "t": 1712077386.0293517}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 18.83426664360845, "units": "Tflops", "t": 1712077387.2709444}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 286.747}}, "t": 1712077386.5359921}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 294.33}}, "t": 1712077387.046058}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 18.91422425761594, "units": "Tflops", "t": 1712077388.433765}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 285.867}}, "t": 1712077387.5526981}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 305.742}}, "t": 1712077388.0605266}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 18.862314821391486, "units": "Tflops", "t": 1712077389.5997648}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 297.547}}, "t": 1712077388.5683527}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 282.626}}, "t": 1712077389.0761986}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 298.406}}, "t": 1712077389.5844991}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 18.85469943443248, "units": "Tflops", "t": 1712077390.7663093}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 288.006}}, "t": 1712077390.0922668}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 309.122}}, "t": 1712077390.599741}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 18.898547878122248, "units": "Tflops", "t": 1712077391.930084}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 288.389}}, "t": 1712077391.106807}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 281.945}}, "t": 1712077391.613375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 18.93784403273525, "units": "Tflops", "t": 1712077393.0914242}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 298.522}}, "t": 1712077392.1207025}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 294.242}}, "t": 1712077392.6343484}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 18.884846063600598, "units": "Tflops", "t": 1712077394.2560513}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 300.459}}, "t": 1712077393.1409867}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 307.47}}, "t": 1712077393.6502435}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 291.611}}, "t": 1712077394.1608934}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 18.86032844409441, "units": "Tflops", "t": 1712077395.4221985}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 298.614}}, "t": 1712077394.67172}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 293.063}}, "t": 1712077395.1783574}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 18.924380292795476, "units": "Tflops", "t": 1712077396.584362}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 292.186}}, "t": 1712077395.6873908}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 281.274}}, "t": 1712077396.1950583}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 18.883357512776975, "units": "Tflops", "t": 1712077397.7490659}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 303.173}}, "t": 1712077396.7038152}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 290.832}}, "t": 1712077397.2129517}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 300.962}}, "t": 1712077397.719542}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 18.923238795885464, "units": "Tflops", "t": 1712077398.9113703}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 299.208}}, "t": 1712077398.226309}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 306.607}}, "t": 1712077398.7329216}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 18.96719635480691, "units": "Tflops", "t": 1712077400.0709205}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 289.368}}, "t": 1712077399.2398987}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 304.161}}, "t": 1712077399.7465804}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 18.921418121557824, "units": "Tflops", "t": 1712077401.233293}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 291.611}}, "t": 1712077400.2571023}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 294.824}}, "t": 1712077400.7637312}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 18.893348832071833, "units": "Tflops", "t": 1712077402.3973691}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 279.705}}, "t": 1712077401.2721446}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 291.709}}, "t": 1712077401.780039}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 298.042}}, "t": 1712077402.2889624}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 18.972166851357798, "units": "Tflops", "t": 1712077403.5566478}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 280.987}}, "t": 1712077402.7965493}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 299.221}}, "t": 1712077403.3041596}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 18.987332414894, "units": "Tflops", "t": 1712077404.71496}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 308.445}}, "t": 1712077403.812741}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 290.45}}, "t": 1712077404.3193288}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 18.950127685495993, "units": "Tflops", "t": 1712077405.875577}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 309.301}}, "t": 1712077404.826258}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 290.437}}, "t": 1712077405.3343413}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 279.138}}, "t": 1712077405.8409684}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 18.96712224623274, "units": "Tflops", "t": 1712077407.0351598}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 304.159}}, "t": 1712077406.3485608}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 294.442}}, "t": 1712077406.8576987}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 18.993216910663154, "units": "Tflops", "t": 1712077408.1931806}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 295.401}}, "t": 1712077407.3710613}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 305.238}}, "t": 1712077407.8802004}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 18.973099597359415, "units": "Tflops", "t": 1712077409.352403}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 294.723}}, "t": 1712077408.3894868}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 285.177}}, "t": 1712077408.8994203}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 18.99138274638699, "units": "Tflops", "t": 1712077410.5104814}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 299.889}}, "t": 1712077409.4103992}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 293.65}}, "t": 1712077409.9209597}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 302.511}}, "t": 1712077410.4275205}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 18.991930221020205, "units": "Tflops", "t": 1712077411.6685834}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 294.722}}, "t": 1712077410.9340382}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 303.093}}, "t": 1712077411.4418168}, "pipe": "data"}
+{"event": "end", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "10", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "fp32"], "time": 1712077412.4395995, "return_code": 0}, "pipe": null}
diff --git a/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/llama.D0.data b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/llama.D0.data
new file mode 100644
index 000000000..2e7a034ae
--- /dev/null
+++ b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/llama.D0.data
@@ -0,0 +1,631 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/llm", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "llm", "install_group": "torch", "install_variant": "cuda", "run_name": "majaguma.2024-04-02_16:55:21.895752", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 800, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "b7a983fcf74c005cfc04d84913c69872", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/llama", "plan": {"method": "per_gpu"}, "tags": ["llm", "nlp"], "weight": 1.0, "name": "llama", "tag": ["llama", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "trustingspider", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-b19ad74c-adf3-683a-1bdd-0ce516ef4aae": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 36, "power": 45.357, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712076924.998052, "milabench": {"tag": "paice-v1-10-g1243bba", "commit": "1243bbae76bfc8105d553472dcfce834b54a723e", "date": "2024-04-02 12:02:23 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["python", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/llama/main.py", "--cache", "/Users/satyaortiz-gagne/travail/mila/milabench/cache"], "time": 1712076925.0079544}, "pipe": null}
+{"event": "line", "data": "Dataset\n", "pipe": "stderr"}
+{"event": "line", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/datasets/table.py:1421: FutureWarning: promote has been superseded by mode='default'.\n", "pipe": "stderr"}
+{"event": "line", "data": " table = cls._concat_blocks(blocks, axis=0)\n", "pipe": "stderr"}
+{"event": "line", "data": "Tokenizer\n", "pipe": "stderr"}
+{"event": "line", "data": "Model\n", "pipe": "stderr"}
+{"event": "line", "data": "Pipeline\n", "pipe": "stderr"}
+{"event": "line", "data": "Starting\n", "pipe": "stderr"}
+{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"}
+{"event": "line", "data": "elapsed =10.143764019012451, total / elapsed =200.41870021715306 in_token_count =9 out_token_count =2024\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "train", "rate": 200.41870021715306, "units": "Tok/s", "t": 1712076998.716492}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27230.375, 81920.0], "load": 0, "temperature": 35, "power": 62.838}}, "t": 1712076988.6630177}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27252.375, 81920.0], "load": 0.18, "temperature": 36, "power": 63.025}}, "t": 1712076989.1705136}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27278.375, 81920.0], "load": 0.53, "temperature": 37, "power": 217.038}}, "t": 1712076989.6808262}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27328.375, 81920.0], "load": 0.96, "temperature": 38, "power": 219.719}}, "t": 1712076990.1933587}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27374.375, 81920.0], "load": 0.97, "temperature": 39, "power": 222.577}}, "t": 1712076990.7016249}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27554.375, 81920.0], "load": 0.96, "temperature": 39, "power": 224.342}}, "t": 1712076991.2115796}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27614.375, 81920.0], "load": 0.97, "temperature": 40, "power": 223.806}}, "t": 1712076991.7192144}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27654.375, 81920.0], "load": 0.96, "temperature": 40, "power": 224.837}}, "t": 1712076992.2300007}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27714.375, 81920.0], "load": 0.96, "temperature": 40, "power": 221.056}}, "t": 1712076992.7383416}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27754.375, 81920.0], "load": 0.96, "temperature": 40, "power": 223.825}}, "t": 1712076993.2484715}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27814.375, 81920.0], "load": 0.96, "temperature": 40, "power": 227.214}}, "t": 1712076993.7567909}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27834.375, 81920.0], "load": 0.96, "temperature": 41, "power": 218.945}}, "t": 1712076994.2655935}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27914.375, 81920.0], "load": 0.96, "temperature": 41, "power": 216.4}}, "t": 1712076994.7731984}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27914.375, 81920.0], "load": 0.96, "temperature": 41, "power": 222.327}}, "t": 1712076995.2866879}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28034.375, 81920.0], "load": 0.96, "temperature": 41, "power": 222.858}}, "t": 1712076995.7954605}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28034.375, 81920.0], "load": 0.97, "temperature": 42, "power": 224.106}}, "t": 1712076996.3050852}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28034.375, 81920.0], "load": 0.97, "temperature": 42, "power": 222.23}}, "t": 1712076996.8133888}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28254.375, 81920.0], "load": 0.97, "temperature": 42, "power": 224.239}}, "t": 1712076997.3242602}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28254.375, 81920.0], "load": 0.96, "temperature": 42, "power": 231.487}}, "t": 1712076997.8326375}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28254.375, 81920.0], "load": 0.96, "temperature": 43, "power": 227.303}}, "t": 1712076998.343296}, "pipe": "data"}
+{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"}
+{"event": "line", "data": "elapsed =5.301281929016113, total / elapsed =380.66264481325726 in_token_count =185 out_token_count =1833\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "train", "rate": 380.66264481325726, "units": "Tok/s", "t": 1712077004.0177922}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 44, "power": 296.968}}, "t": 1712076998.8758256}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.97, "temperature": 43, "power": 225.913}}, "t": 1712076999.3872433}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 43, "power": 227.986}}, "t": 1712076999.8955493}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 43, "power": 231.035}}, "t": 1712077000.4068785}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 44, "power": 230.428}}, "t": 1712077000.914623}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 44, "power": 232.889}}, "t": 1712077001.4255269}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 44, "power": 230.945}}, "t": 1712077001.9338295}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.97, "temperature": 44, "power": 232.59}}, "t": 1712077002.4444304}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 44, "power": 232.228}}, "t": 1712077002.9519699}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 45, "power": 229.549}}, "t": 1712077003.464068}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.97, "temperature": 45, "power": 229.698}}, "t": 1712077003.9715762}, "pipe": "data"}
+{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"}
+{"event": "line", "data": "elapsed =6.773216962814331, total / elapsed =308.2730128775349 in_token_count =121 out_token_count =1967\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "train", "rate": 308.2730128775349, "units": "Tok/s", "t": 1712077010.7910252}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.98, "temperature": 45, "power": 229.169}}, "t": 1712077004.4843094}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 45, "power": 233.386}}, "t": 1712077004.9926116}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 45, "power": 228.655}}, "t": 1712077005.5009358}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.95, "temperature": 45, "power": 224.538}}, "t": 1712077006.0085108}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 46, "power": 226.02}}, "t": 1712077006.519377}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 46, "power": 226.361}}, "t": 1712077007.0276918}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 46, "power": 226.227}}, "t": 1712077007.5377593}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 46, "power": 229.571}}, "t": 1712077008.0461383}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 46, "power": 226.718}}, "t": 1712077008.5587854}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 47, "power": 229.627}}, "t": 1712077009.0674236}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 47, "power": 223.125}}, "t": 1712077009.5787327}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 47, "power": 232.784}}, "t": 1712077010.08634}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.97, "temperature": 47, "power": 229.86}}, "t": 1712077010.596971}, "pipe": "data"}
+{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"}
+{"event": "line", "data": "elapsed =6.612996339797974, total / elapsed =311.20537412287024 in_token_count =127 out_token_count =1931\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "train", "rate": 311.20537412287024, "units": "Tok/s", "t": 1712077017.4040391}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.95, "temperature": 48, "power": 230.971}}, "t": 1712077011.1051986}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 48, "power": 235.344}}, "t": 1712077011.61562}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 48, "power": 226.455}}, "t": 1712077012.1239812}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.97, "temperature": 48, "power": 231.862}}, "t": 1712077012.6323142}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 48, "power": 233.267}}, "t": 1712077013.1406302}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.97, "temperature": 48, "power": 235.787}}, "t": 1712077013.6531157}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.97, "temperature": 48, "power": 233.89}}, "t": 1712077014.161289}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 48, "power": 230.169}}, "t": 1712077014.6751626}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 49, "power": 236.538}}, "t": 1712077015.187888}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.97, "temperature": 49, "power": 236.161}}, "t": 1712077015.696339}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 49, "power": 233.52}}, "t": 1712077016.2080505}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.97, "temperature": 49, "power": 237.084}}, "t": 1712077016.7164338}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.97, "temperature": 49, "power": 232.244}}, "t": 1712077017.227582}, "pipe": "data"}
+{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"}
+{"event": "line", "data": "elapsed =9.362201690673828, total / elapsed =212.6636517544092 in_token_count =6 out_token_count =1985\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "train", "rate": 212.6636517544092, "units": "Tok/s", "t": 1712077026.7662604}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 49, "power": 236.25}}, "t": 1712077017.7361279}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 49, "power": 232.235}}, "t": 1712077018.2455752}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.94, "temperature": 50, "power": 233.598}}, "t": 1712077018.7532315}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 50, "power": 232.253}}, "t": 1712077019.2651622}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 50, "power": 236.93}}, "t": 1712077019.7735038}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 50, "power": 236.155}}, "t": 1712077020.2863476}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 50, "power": 235.889}}, "t": 1712077020.7939498}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 50, "power": 228.61}}, "t": 1712077021.303867}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 50, "power": 237.99}}, "t": 1712077021.8151917}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.97, "temperature": 51, "power": 238.11}}, "t": 1712077022.325006}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 51, "power": 236.946}}, "t": 1712077022.8332937}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 51, "power": 230.374}}, "t": 1712077023.3424761}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.97, "temperature": 51, "power": 231.61}}, "t": 1712077023.8506193}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.97, "temperature": 51, "power": 231.967}}, "t": 1712077024.3609152}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.97, "temperature": 51, "power": 228.327}}, "t": 1712077024.8693233}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 51, "power": 241.374}}, "t": 1712077025.3818033}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.97, "temperature": 52, "power": 238.242}}, "t": 1712077025.890193}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.97, "temperature": 52, "power": 236.543}}, "t": 1712077026.4031544}, "pipe": "data"}
+{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"}
+{"event": "line", "data": "elapsed =3.6670026779174805, total / elapsed =597.2179985545356 in_token_count =256 out_token_count =1934\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "train", "rate": 597.2179985545356, "units": "Tok/s", "t": 1712077030.4332798}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28310.375, 81920.0], "load": 0.97, "temperature": 54, "power": 303.674}}, "t": 1712077026.913884}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28310.375, 81920.0], "load": 0.97, "temperature": 52, "power": 232.031}}, "t": 1712077027.4261255}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28310.375, 81920.0], "load": 0.96, "temperature": 52, "power": 228.938}}, "t": 1712077027.9345155}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28310.375, 81920.0], "load": 0.97, "temperature": 52, "power": 234.831}}, "t": 1712077028.4460652}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28310.375, 81920.0], "load": 0.96, "temperature": 52, "power": 238.455}}, "t": 1712077028.9543102}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28310.375, 81920.0], "load": 0.96, "temperature": 53, "power": 241.228}}, "t": 1712077029.4647264}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28310.375, 81920.0], "load": 0.97, "temperature": 53, "power": 239.598}}, "t": 1712077029.9724667}, "pipe": "data"}
+{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"}
+{"event": "line", "data": "elapsed =1.7476789951324463, total / elapsed =1291.9992780647224 in_token_count =340 out_token_count =1918\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "train", "rate": 1291.9992780647224, "units": "Tok/s", "t": 1712077032.180981}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28310.375, 81920.0], "load": 0.97, "temperature": 53, "power": 238.227}}, "t": 1712077030.4800365}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.99, "temperature": 53, "power": 237.169}}, "t": 1712077030.9893632}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 53, "power": 244.685}}, "t": 1712077031.4977155}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 53, "power": 242.639}}, "t": 1712077032.007775}, "pipe": "data"}
+{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"}
+{"event": "line", "data": "elapsed =7.349722146987915, total / elapsed =289.39869527879955 in_token_count =95 out_token_count =2032\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "train", "rate": 289.39869527879955, "units": "Tok/s", "t": 1712077039.5307202}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 53, "power": 236.158}}, "t": 1712077032.5173497}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 54, "power": 234.351}}, "t": 1712077033.0257845}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 54, "power": 242.061}}, "t": 1712077033.5366666}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.95, "temperature": 54, "power": 244.591}}, "t": 1712077034.04434}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 54, "power": 247.438}}, "t": 1712077034.5543199}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 54, "power": 238.485}}, "t": 1712077035.0626655}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 54, "power": 238.937}}, "t": 1712077035.5749645}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 54, "power": 241.002}}, "t": 1712077036.0832703}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 54, "power": 242.234}}, "t": 1712077036.594024}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 55, "power": 242.633}}, "t": 1712077037.1023319}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 55, "power": 240.705}}, "t": 1712077037.6136827}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 55, "power": 239.681}}, "t": 1712077038.122026}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 55, "power": 242.024}}, "t": 1712077038.632818}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 55, "power": 236.571}}, "t": 1712077039.1409974}, "pipe": "data"}
+{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"}
+{"event": "line", "data": "elapsed =9.412617206573486, total / elapsed =220.02382063871423 in_token_count =5 out_token_count =2066\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "train", "rate": 220.02382063871423, "units": "Tok/s", "t": 1712077048.943358}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.94, "temperature": 55, "power": 229.427}}, "t": 1712077039.6493304}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 55, "power": 238.342}}, "t": 1712077040.156812}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 55, "power": 236.089}}, "t": 1712077040.6686745}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 55, "power": 240.344}}, "t": 1712077041.1770523}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.95, "temperature": 55, "power": 236.799}}, "t": 1712077041.688002}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.95, "temperature": 55, "power": 237.261}}, "t": 1712077042.1962936}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 56, "power": 240.961}}, "t": 1712077042.704597}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.95, "temperature": 56, "power": 239.859}}, "t": 1712077043.2144172}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 56, "power": 246.777}}, "t": 1712077043.7232552}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 56, "power": 244.985}}, "t": 1712077044.2340555}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 56, "power": 235.963}}, "t": 1712077044.7415862}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 56, "power": 243.199}}, "t": 1712077045.2512512}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 56, "power": 242.245}}, "t": 1712077045.76066}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 56, "power": 244.207}}, "t": 1712077046.2732058}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 56, "power": 244.137}}, "t": 1712077046.781587}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 57, "power": 244.131}}, "t": 1712077047.29467}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 57, "power": 238.817}}, "t": 1712077047.8031044}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 57, "power": 236.317}}, "t": 1712077048.312418}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 57, "power": 247.031}}, "t": 1712077048.8201714}, "pipe": "data"}
+{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"}
+{"event": "line", "data": "elapsed =3.7340760231018066, total / elapsed =563.1915330564397 in_token_count =253 out_token_count =1850\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "train", "rate": 563.1915330564397, "units": "Tok/s", "t": 1712077052.6774495}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.98, "temperature": 57, "power": 236.028}}, "t": 1712077049.3311498}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 57, "power": 241.04}}, "t": 1712077049.8398747}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 57, "power": 239.926}}, "t": 1712077050.3485436}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 57, "power": 246.374}}, "t": 1712077050.8572392}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 57, "power": 243.698}}, "t": 1712077051.3686786}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 57, "power": 239.484}}, "t": 1712077051.877047}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 58, "power": 238.033}}, "t": 1712077052.3909776}, "pipe": "data"}
+{"event": "line", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/pipelines/base.py:1101: UserWarning: You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset\n", "pipe": "stderr"}
+{"event": "line", "data": " warnings.warn(\n", "pipe": "stderr"}
+{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"}
+{"event": "line", "data": "elapsed =3.099916696548462, total / elapsed =681.6312200752608 in_token_count =282 out_token_count =1831\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "train", "rate": 681.6312200752608, "units": "Tok/s", "t": 1712077055.777384}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.94, "temperature": 60, "power": 305.004}}, "t": 1712077052.9005234}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 58, "power": 248.398}}, "t": 1712077053.4102662}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 58, "power": 241.597}}, "t": 1712077053.919587}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 58, "power": 248.901}}, "t": 1712077054.4286094}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 58, "power": 248.828}}, "t": 1712077054.9384027}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 58, "power": 246.998}}, "t": 1712077055.4479783}, "pipe": "data"}
+{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"}
+{"event": "line", "data": "elapsed =3.6785972118377686, total / elapsed =569.7824141373912 in_token_count =256 out_token_count =1840\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "train", "rate": 569.7824141373912, "units": "Tok/s", "t": 1712077059.4559987}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 61, "power": 299.658}}, "t": 1712077055.955819}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 58, "power": 250.26}}, "t": 1712077056.4646406}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 58, "power": 250.794}}, "t": 1712077056.9743443}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 59, "power": 239.45}}, "t": 1712077057.48686}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 59, "power": 249.554}}, "t": 1712077057.9951653}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 59, "power": 247.034}}, "t": 1712077058.5079925}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 59, "power": 241.22}}, "t": 1712077059.0161667}, "pipe": "data"}
+{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"}
+{"event": "line", "data": "elapsed =9.39958119392395, total / elapsed =212.6690496904466 in_token_count =5 out_token_count =1994\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "train", "rate": 212.6690496904466, "units": "Tok/s", "t": 1712077068.8556008}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 59, "power": 250.64}}, "t": 1712077059.526569}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.93, "temperature": 59, "power": 238.784}}, "t": 1712077060.034873}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 59, "power": 247.337}}, "t": 1712077060.5458949}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.95, "temperature": 59, "power": 243.86}}, "t": 1712077061.0536392}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 59, "power": 250.232}}, "t": 1712077061.564295}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 59, "power": 244.394}}, "t": 1712077062.0726852}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 59, "power": 251.124}}, "t": 1712077062.585282}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 59, "power": 239.521}}, "t": 1712077063.0929062}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 59, "power": 248.057}}, "t": 1712077063.6058283}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 60, "power": 248.686}}, "t": 1712077064.114153}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 60, "power": 250.861}}, "t": 1712077064.6254284}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 60, "power": 251.245}}, "t": 1712077065.1330726}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 60, "power": 249.063}}, "t": 1712077065.6438518}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 60, "power": 242.914}}, "t": 1712077066.152168}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 60, "power": 241.384}}, "t": 1712077066.660989}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 60, "power": 241.173}}, "t": 1712077067.1686792}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 60, "power": 252.494}}, "t": 1712077067.6824338}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 60, "power": 251.748}}, "t": 1712077068.1907816}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 60, "power": 250.953}}, "t": 1712077068.7032185}, "pipe": "data"}
+{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"}
+{"event": "line", "data": "elapsed =1.5333330631256104, total / elapsed =1351.9567599842333 in_token_count =349 out_token_count =1724\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "train", "rate": 1351.9567599842333, "units": "Tok/s", "t": 1712077070.3889477}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 1.0, "temperature": 63, "power": 298.881}}, "t": 1712077069.2150197}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.97, "temperature": 61, "power": 256.021}}, "t": 1712077069.7245393}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.97, "temperature": 61, "power": 255.156}}, "t": 1712077070.2358518}, "pipe": "data"}
+{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"}
+{"event": "line", "data": "elapsed =2.981513738632202, total / elapsed =772.4264255969933 in_token_count =287 out_token_count =2016\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "train", "rate": 772.4264255969933, "units": "Tok/s", "t": 1712077073.370476}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 1.0, "temperature": 63, "power": 297.8}}, "t": 1712077070.7478192}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.97, "temperature": 61, "power": 256.517}}, "t": 1712077071.2593775}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.97, "temperature": 61, "power": 255.708}}, "t": 1712077071.7707248}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.97, "temperature": 61, "power": 256.301}}, "t": 1712077072.2863202}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.97, "temperature": 61, "power": 251.569}}, "t": 1712077072.7944968}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.97, "temperature": 61, "power": 248.039}}, "t": 1712077073.3056884}, "pipe": "data"}
+{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"}
+{"event": "line", "data": "elapsed =9.337865114212036, total / elapsed =219.10789832314356 in_token_count =7 out_token_count =2039\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "train", "rate": 219.10789832314356, "units": "Tok/s", "t": 1712077082.7083616}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.96, "temperature": 61, "power": 256.259}}, "t": 1712077073.8139553}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.96, "temperature": 61, "power": 254.062}}, "t": 1712077074.324509}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.96, "temperature": 61, "power": 245.754}}, "t": 1712077074.8325636}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.95, "temperature": 61, "power": 253.299}}, "t": 1712077075.3419423}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.96, "temperature": 62, "power": 247.681}}, "t": 1712077075.8507304}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.96, "temperature": 62, "power": 253.52}}, "t": 1712077076.362146}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.96, "temperature": 62, "power": 257.923}}, "t": 1712077076.8705842}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.96, "temperature": 62, "power": 246.558}}, "t": 1712077077.3817143}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.97, "temperature": 62, "power": 255.142}}, "t": 1712077077.8927941}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.96, "temperature": 62, "power": 256.727}}, "t": 1712077078.4028542}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.97, "temperature": 62, "power": 257.302}}, "t": 1712077078.9126284}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.97, "temperature": 62, "power": 256.593}}, "t": 1712077079.4223073}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.96, "temperature": 62, "power": 255.153}}, "t": 1712077079.9321141}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.96, "temperature": 62, "power": 253.693}}, "t": 1712077080.4409366}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.96, "temperature": 62, "power": 246.981}}, "t": 1712077080.948501}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.96, "temperature": 62, "power": 258.648}}, "t": 1712077081.4613638}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.97, "temperature": 63, "power": 257.42}}, "t": 1712077081.9697726}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.97, "temperature": 63, "power": 256.17}}, "t": 1712077082.48259}, "pipe": "data"}
+{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"}
+{"event": "line", "data": "elapsed =1.1987826824188232, total / elapsed =1825.1848580137982 in_token_count =363 out_token_count =1825\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "train", "rate": 1825.1848580137982, "units": "Tok/s", "t": 1712077083.9071574}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 66, "power": 302.668}}, "t": 1712077082.9919574}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 63, "power": 257.015}}, "t": 1712077083.5002096}, "pipe": "data"}
+{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"}
+{"event": "line", "data": "elapsed =9.360902070999146, total / elapsed =216.43213278309116 in_token_count =7 out_token_count =2019\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "train", "rate": 216.43213278309116, "units": "Tok/s", "t": 1712077093.2680798}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 63, "power": 228.772}}, "t": 1712077084.0085957}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.94, "temperature": 63, "power": 249.24}}, "t": 1712077084.519348}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.94, "temperature": 63, "power": 247.319}}, "t": 1712077085.027666}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 63, "power": 253.18}}, "t": 1712077085.5382407}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 63, "power": 252.613}}, "t": 1712077086.0458567}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 63, "power": 257.612}}, "t": 1712077086.557521}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 63, "power": 250.177}}, "t": 1712077087.0658932}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.95, "temperature": 63, "power": 251.754}}, "t": 1712077087.5790062}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 63, "power": 253.479}}, "t": 1712077088.0873787}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 64, "power": 258.49}}, "t": 1712077088.5971737}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 64, "power": 245.067}}, "t": 1712077089.1055946}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 64, "power": 247.452}}, "t": 1712077089.61578}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 64, "power": 255.314}}, "t": 1712077090.1241825}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 64, "power": 254.541}}, "t": 1712077090.634713}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 64, "power": 251.056}}, "t": 1712077091.1422677}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 64, "power": 254.212}}, "t": 1712077091.65464}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 64, "power": 261.404}}, "t": 1712077092.1630156}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 64, "power": 260.322}}, "t": 1712077092.675888}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 64, "power": 259.375}}, "t": 1712077093.187532}, "pipe": "data"}
+{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"}
+{"event": "line", "data": "elapsed =1.6552915573120117, total / elapsed =1223.3494401967162 in_token_count =344 out_token_count =1681\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "train", "rate": 1223.3494401967162, "units": "Tok/s", "t": 1712077094.9233859}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 1.0, "temperature": 65, "power": 305.602}}, "t": 1712077093.6974325}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 64, "power": 258.964}}, "t": 1712077094.2057726}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 64, "power": 259.262}}, "t": 1712077094.7170568}, "pipe": "data"}
+{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"}
+{"event": "line", "data": "elapsed =6.735776901245117, total / elapsed =302.8603871400348 in_token_count =122 out_token_count =1918\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "train", "rate": 302.8603871400348, "units": "Tok/s", "t": 1712077101.6591804}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.95, "temperature": 65, "power": 242.883}}, "t": 1712077095.2282736}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 65, "power": 260.724}}, "t": 1712077095.7365975}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 65, "power": 256.398}}, "t": 1712077096.2479167}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 65, "power": 253.015}}, "t": 1712077096.756201}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 65, "power": 259.385}}, "t": 1712077097.2644558}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 65, "power": 260.834}}, "t": 1712077097.7742095}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 65, "power": 261.829}}, "t": 1712077098.2867599}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 65, "power": 261.797}}, "t": 1712077098.7943554}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 65, "power": 257.263}}, "t": 1712077099.3072329}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 65, "power": 262.32}}, "t": 1712077099.8155463}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 65, "power": 261.33}}, "t": 1712077100.327143}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 65, "power": 259.641}}, "t": 1712077100.8346636}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 65, "power": 257.334}}, "t": 1712077101.3453913}, "pipe": "data"}
+{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"}
+{"event": "line", "data": "elapsed =9.376733779907227, total / elapsed =208.60142197823524 in_token_count =6 out_token_count =1950\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "train", "rate": 208.60142197823524, "units": "Tok/s", "t": 1712077111.0359323}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.94, "temperature": 65, "power": 259.524}}, "t": 1712077101.8537173}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 65, "power": 255.343}}, "t": 1712077102.3643446}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 65, "power": 261.329}}, "t": 1712077102.8727148}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 65, "power": 261.487}}, "t": 1712077103.3838775}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 66, "power": 250.746}}, "t": 1712077103.8915277}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 66, "power": 258.779}}, "t": 1712077104.4042647}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 66, "power": 263.045}}, "t": 1712077104.9125872}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 66, "power": 256.57}}, "t": 1712077105.4216917}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 66, "power": 258.342}}, "t": 1712077105.9300387}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 66, "power": 258.881}}, "t": 1712077106.4407473}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.95, "temperature": 66, "power": 261.719}}, "t": 1712077106.9490478}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 66, "power": 261.701}}, "t": 1712077107.4596784}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 66, "power": 264.014}}, "t": 1712077107.9678414}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 66, "power": 262.896}}, "t": 1712077108.4795127}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 66, "power": 262.673}}, "t": 1712077108.9872966}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 66, "power": 260.643}}, "t": 1712077109.5003147}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 66, "power": 259.18}}, "t": 1712077110.0116837}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 66, "power": 264.342}}, "t": 1712077110.52004}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 67, "power": 264.338}}, "t": 1712077111.0284092}, "pipe": "data"}
+{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"}
+{"event": "line", "data": "elapsed =7.454042434692383, total / elapsed =262.1396399496938 in_token_count =91 out_token_count =1863\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "train", "rate": 262.1396399496938, "units": "Tok/s", "t": 1712077118.4899912}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.95, "temperature": 67, "power": 259.961}}, "t": 1712077111.5362024}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 67, "power": 253.052}}, "t": 1712077112.0461195}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 67, "power": 255.804}}, "t": 1712077112.5547445}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 67, "power": 260.209}}, "t": 1712077113.063104}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 67, "power": 260.451}}, "t": 1712077113.575485}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 67, "power": 263.274}}, "t": 1712077114.0838072}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 67, "power": 256.366}}, "t": 1712077114.596054}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 67, "power": 259.88}}, "t": 1712077115.1043634}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 67, "power": 253.745}}, "t": 1712077115.6155698}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 67, "power": 255.264}}, "t": 1712077116.12388}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 67, "power": 258.281}}, "t": 1712077116.6325793}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 67, "power": 264.545}}, "t": 1712077117.1409705}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 67, "power": 264.976}}, "t": 1712077117.6516128}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 67, "power": 264.431}}, "t": 1712077118.1599905}, "pipe": "data"}
+{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"}
+{"event": "line", "data": "elapsed =5.846051216125488, total / elapsed =348.78243871276953 in_token_count =162 out_token_count =1877\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "train", "rate": 348.78243871276953, "units": "Tok/s", "t": 1712077124.3360605}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 69, "power": 232.784}}, "t": 1712077118.7225287}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 68, "power": 265.613}}, "t": 1712077119.2333412}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 68, "power": 269.511}}, "t": 1712077119.7446232}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 68, "power": 268.822}}, "t": 1712077120.252583}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 68, "power": 270.094}}, "t": 1712077120.7638667}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 68, "power": 262.892}}, "t": 1712077121.274015}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 68, "power": 265.972}}, "t": 1712077121.783775}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 68, "power": 264.437}}, "t": 1712077122.2931328}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 68, "power": 263.491}}, "t": 1712077122.8026392}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 68, "power": 260.802}}, "t": 1712077123.3119967}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 68, "power": 269.439}}, "t": 1712077123.82158}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 68, "power": 268.243}}, "t": 1712077124.333879}, "pipe": "data"}
+{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"}
+{"event": "line", "data": "elapsed =5.276447296142578, total / elapsed =394.20463870085723 in_token_count =186 out_token_count =1894\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "train", "rate": 394.20463870085723, "units": "Tok/s", "t": 1712077129.6125243}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 68, "power": 267.349}}, "t": 1712077124.8477187}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 68, "power": 267.877}}, "t": 1712077125.3589501}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 68, "power": 269.314}}, "t": 1712077125.8702157}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 68, "power": 269.52}}, "t": 1712077126.3799691}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 69, "power": 269.618}}, "t": 1712077126.8899188}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 69, "power": 267.782}}, "t": 1712077127.3988347}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 69, "power": 270.565}}, "t": 1712077127.9089699}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 69, "power": 270.95}}, "t": 1712077128.4172628}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 69, "power": 270.946}}, "t": 1712077128.927256}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 69, "power": 267.246}}, "t": 1712077129.4367208}, "pipe": "data"}
+{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"}
+{"event": "line", "data": "elapsed =6.864799976348877, total / elapsed =298.91621126175625 in_token_count =117 out_token_count =1935\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "train", "rate": 298.91621126175625, "units": "Tok/s", "t": 1712077136.4773426}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 69, "power": 256.768}}, "t": 1712077129.9444873}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 69, "power": 269.566}}, "t": 1712077130.456895}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 69, "power": 259.187}}, "t": 1712077130.9652357}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 69, "power": 265.622}}, "t": 1712077131.478324}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 69, "power": 263.837}}, "t": 1712077131.9860687}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 69, "power": 265.017}}, "t": 1712077132.4980223}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 69, "power": 267.676}}, "t": 1712077133.006613}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 69, "power": 265.808}}, "t": 1712077133.5158017}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 69, "power": 268.92}}, "t": 1712077134.024262}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 69, "power": 269.13}}, "t": 1712077134.5348558}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 69, "power": 265.43}}, "t": 1712077135.043271}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 69, "power": 263.005}}, "t": 1712077135.5555665}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 69, "power": 275.159}}, "t": 1712077136.06371}, "pipe": "data"}
+{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"}
+{"event": "line", "data": "elapsed =9.365670204162598, total / elapsed =215.57453508267352 in_token_count =6 out_token_count =2013\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "train", "rate": 215.57453508267352, "units": "Tok/s", "t": 1712077145.8430338}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 70, "power": 251.528}}, "t": 1712077136.5744483}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 69, "power": 266.814}}, "t": 1712077137.0818756}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 69, "power": 266.688}}, "t": 1712077137.5924528}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 70, "power": 270.304}}, "t": 1712077138.10077}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 70, "power": 265.715}}, "t": 1712077138.6116982}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 70, "power": 269.025}}, "t": 1712077139.1199605}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.95, "temperature": 70, "power": 272.951}}, "t": 1712077139.629561}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 70, "power": 262.893}}, "t": 1712077140.1379366}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 70, "power": 271.083}}, "t": 1712077140.646874}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 70, "power": 268.944}}, "t": 1712077141.1551952}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 70, "power": 260.215}}, "t": 1712077141.6674006}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 70, "power": 261.865}}, "t": 1712077142.175712}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 70, "power": 263.322}}, "t": 1712077142.6850648}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 70, "power": 268.791}}, "t": 1712077143.1927297}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 70, "power": 266.425}}, "t": 1712077143.7035}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 70, "power": 263.986}}, "t": 1712077144.2118268}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 70, "power": 265.806}}, "t": 1712077144.7222881}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 70, "power": 277.388}}, "t": 1712077145.2306936}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 70, "power": 271.722}}, "t": 1712077145.743145}, "pipe": "data"}
+{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"}
+{"event": "line", "data": "elapsed =7.434460878372192, total / elapsed =266.865349412452 in_token_count =91 out_token_count =1893\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "train", "rate": 266.865349412452, "units": "Tok/s", "t": 1712077153.2775137}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.95, "temperature": 70, "power": 270.909}}, "t": 1712077146.250626}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 70, "power": 270.014}}, "t": 1712077146.762114}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 70, "power": 267.199}}, "t": 1712077147.2704577}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 70, "power": 271.914}}, "t": 1712077147.7796578}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 70, "power": 262.221}}, "t": 1712077148.288019}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 70, "power": 266.527}}, "t": 1712077148.7983856}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 70, "power": 270.045}}, "t": 1712077149.3082378}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 70, "power": 273.251}}, "t": 1712077149.8170698}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 70, "power": 274.223}}, "t": 1712077150.3264296}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 70, "power": 273.059}}, "t": 1712077150.8358314}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 70, "power": 273.754}}, "t": 1712077151.3464525}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 70, "power": 274.234}}, "t": 1712077151.8572943}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 70, "power": 271.586}}, "t": 1712077152.3686512}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 70, "power": 272.48}}, "t": 1712077152.8774369}, "pipe": "data"}
+{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"}
+{"event": "line", "data": "elapsed =9.292436361312866, total / elapsed =224.16080336822526 in_token_count =9 out_token_count =2074\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "train", "rate": 224.16080336822526, "units": "Tok/s", "t": 1712077162.56997}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 71, "power": 219.784}}, "t": 1712077153.3882813}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 70, "power": 263.883}}, "t": 1712077153.8972597}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 71, "power": 272.381}}, "t": 1712077154.407827}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 71, "power": 271.207}}, "t": 1712077154.9160748}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 71, "power": 272.188}}, "t": 1712077155.4248896}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 71, "power": 268.489}}, "t": 1712077155.932514}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 71, "power": 273.768}}, "t": 1712077156.4451005}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 71, "power": 264.402}}, "t": 1712077156.9534082}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.95, "temperature": 71, "power": 262.537}}, "t": 1712077157.466297}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 71, "power": 266.913}}, "t": 1712077157.9776962}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 71, "power": 266.81}}, "t": 1712077158.4865434}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 71, "power": 271.504}}, "t": 1712077158.9940698}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 71, "power": 273.545}}, "t": 1712077159.503849}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 71, "power": 272.069}}, "t": 1712077160.0137076}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 71, "power": 271.299}}, "t": 1712077160.5224233}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 71, "power": 273.062}}, "t": 1712077161.030317}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 71, "power": 271.824}}, "t": 1712077161.5394044}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 71, "power": 270.817}}, "t": 1712077162.0477192}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 71, "power": 271.586}}, "t": 1712077162.560269}, "pipe": "data"}
+{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"}
+{"event": "line", "data": "elapsed =3.320425033569336, total / elapsed =578.5403918410394 in_token_count =273 out_token_count =1648\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "train", "rate": 578.5403918410394, "units": "Tok/s", "t": 1712077165.8904104}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 1.0, "temperature": 71, "power": 267.79}}, "t": 1712077163.0762973}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 71, "power": 272.752}}, "t": 1712077163.5893717}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 71, "power": 261.754}}, "t": 1712077164.0977495}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 71, "power": 275.42}}, "t": 1712077164.608556}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 71, "power": 271.25}}, "t": 1712077165.116231}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 71, "power": 275.488}}, "t": 1712077165.627081}, "pipe": "data"}
+{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"}
+{"event": "line", "data": "elapsed =3.4148550033569336, total / elapsed =605.5894021758103 in_token_count =269 out_token_count =1799\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "train", "rate": 605.5894021758103, "units": "Tok/s", "t": 1712077169.3052857}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 74, "power": 298.053}}, "t": 1712077166.1348414}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 71, "power": 275.506}}, "t": 1712077166.6453552}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 71, "power": 275.233}}, "t": 1712077167.1536598}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 71, "power": 275.216}}, "t": 1712077167.6672573}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 71, "power": 272.779}}, "t": 1712077168.17551}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 71, "power": 273.452}}, "t": 1712077168.6873498}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 71, "power": 269.087}}, "t": 1712077169.1958363}, "pipe": "data"}
+{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"}
+{"event": "line", "data": "elapsed =4.696740627288818, total / elapsed =392.39978237075167 in_token_count =213 out_token_count =1630\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "train", "rate": 392.39978237075167, "units": "Tok/s", "t": 1712077174.0020456}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 71, "power": 247.569}}, "t": 1712077169.705012}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 71, "power": 273.458}}, "t": 1712077170.2134132}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 71, "power": 273.072}}, "t": 1712077170.7226486}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 71, "power": 271.216}}, "t": 1712077171.2305584}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 71, "power": 274.143}}, "t": 1712077171.74097}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 71, "power": 273.068}}, "t": 1712077172.2492068}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 71, "power": 273.826}}, "t": 1712077172.7587242}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 71, "power": 274.026}}, "t": 1712077173.2671452}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 72, "power": 272.955}}, "t": 1712077173.7810025}, "pipe": "data"}
+{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"}
+{"event": "line", "data": "elapsed =9.253382205963135, total / elapsed =221.75675383619566 in_token_count =11 out_token_count =2041\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "train", "rate": 221.75675383619566, "units": "Tok/s", "t": 1712077183.2554452}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.95, "temperature": 71, "power": 271.295}}, "t": 1712077174.2938197}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 273.149}}, "t": 1712077174.8066704}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 71, "power": 274.239}}, "t": 1712077175.315022}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 270.742}}, "t": 1712077175.8268209}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 276.385}}, "t": 1712077176.3351636}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 72, "power": 268.205}}, "t": 1712077176.8443575}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 72, "power": 274.333}}, "t": 1712077177.3539078}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.95, "temperature": 72, "power": 265.142}}, "t": 1712077177.8624861}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.95, "temperature": 72, "power": 265.128}}, "t": 1712077178.371919}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 72, "power": 272.842}}, "t": 1712077178.881152}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 72, "power": 272.589}}, "t": 1712077179.3923628}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 274.207}}, "t": 1712077179.9036546}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 72, "power": 275.129}}, "t": 1712077180.4170952}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 72, "power": 274.267}}, "t": 1712077180.9254632}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 273.646}}, "t": 1712077181.4355805}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 269.257}}, "t": 1712077181.9442914}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 72, "power": 267.051}}, "t": 1712077182.4546003}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 275.895}}, "t": 1712077182.962811}, "pipe": "data"}
+{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"}
+{"event": "line", "data": "elapsed =6.158296823501587, total / elapsed =298.45914425328385 in_token_count =148 out_token_count =1690\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "train", "rate": 298.45914425328385, "units": "Tok/s", "t": 1712077189.4137588}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 297.072}}, "t": 1712077183.4728684}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 272.207}}, "t": 1712077183.9815722}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 274.638}}, "t": 1712077184.4926016}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 276.804}}, "t": 1712077185.0004954}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 265.911}}, "t": 1712077185.5132625}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 276.294}}, "t": 1712077186.0216515}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 268.428}}, "t": 1712077186.5326214}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 268.105}}, "t": 1712077187.0409584}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 72, "power": 266.831}}, "t": 1712077187.5517237}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 72, "power": 269.23}}, "t": 1712077188.0597699}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 276.589}}, "t": 1712077188.5687995}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 72, "power": 276.014}}, "t": 1712077189.076331}, "pipe": "data"}
+{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"}
+{"event": "line", "data": "elapsed =7.021287679672241, total / elapsed =291.54196400845314 in_token_count =110 out_token_count =1937\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "train", "rate": 291.54196400845314, "units": "Tok/s", "t": 1712077196.4350672}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 74, "power": 309.98}}, "t": 1712077189.5855331}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.95, "temperature": 72, "power": 275.912}}, "t": 1712077190.0938396}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 268.552}}, "t": 1712077190.606417}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 72, "power": 271.333}}, "t": 1712077191.114859}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 272.974}}, "t": 1712077191.6246328}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 275.891}}, "t": 1712077192.1329181}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 263.091}}, "t": 1712077192.6435707}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 72, "power": 268.478}}, "t": 1712077193.1519084}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 72, "power": 271.11}}, "t": 1712077193.6617048}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 72, "power": 270.823}}, "t": 1712077194.169342}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 72, "power": 270.546}}, "t": 1712077194.6817408}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 271.186}}, "t": 1712077195.1900756}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 72, "power": 276.116}}, "t": 1712077195.703848}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 275.904}}, "t": 1712077196.2150166}, "pipe": "data"}
+{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"}
+{"event": "line", "data": "elapsed =9.361604690551758, total / elapsed =199.0043439753701 in_token_count =6 out_token_count =1857\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "train", "rate": 199.0043439753701, "units": "Tok/s", "t": 1712077205.7966914}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 72, "power": 276.008}}, "t": 1712077196.724898}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 275.232}}, "t": 1712077197.234872}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 265.646}}, "t": 1712077197.7445264}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 275.411}}, "t": 1712077198.2540538}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 268.402}}, "t": 1712077198.7628279}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 274.941}}, "t": 1712077199.272262}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 262.252}}, "t": 1712077199.7817655}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 72, "power": 273.443}}, "t": 1712077200.290887}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 72, "power": 276.985}}, "t": 1712077200.802585}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 72, "power": 267.878}}, "t": 1712077201.3167758}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 72, "power": 266.035}}, "t": 1712077201.828062}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 72, "power": 266.841}}, "t": 1712077202.3356338}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 72, "power": 274.224}}, "t": 1712077202.8467946}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 72, "power": 273.772}}, "t": 1712077203.356591}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 72, "power": 270.949}}, "t": 1712077203.8661747}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 269.939}}, "t": 1712077204.3769643}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 72, "power": 265.174}}, "t": 1712077204.8853042}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 276.597}}, "t": 1712077205.396111}, "pipe": "data"}
+{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"}
+{"event": "line", "data": "elapsed =3.2115976810455322, total / elapsed =632.3955245038071 in_token_count =278 out_token_count =1753\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "train", "rate": 632.3955245038071, "units": "Tok/s", "t": 1712077209.008307}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.95, "temperature": 72, "power": 321.446}}, "t": 1712077205.9039085}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 275.602}}, "t": 1712077206.4125881}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 73, "power": 280.078}}, "t": 1712077206.9216664}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 276.414}}, "t": 1712077207.4309158}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 73, "power": 269.521}}, "t": 1712077207.9402602}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 270.701}}, "t": 1712077208.4489868}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 280.802}}, "t": 1712077208.957864}, "pipe": "data"}
+{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"}
+{"event": "line", "data": "elapsed =9.364078044891357, total / elapsed =211.55312786833824 in_token_count =6 out_token_count =1975\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "train", "rate": 211.55312786833824, "units": "Tok/s", "t": 1712077218.3724022}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.95, "temperature": 72, "power": 271.01}}, "t": 1712077209.4672763}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.95, "temperature": 72, "power": 277.486}}, "t": 1712077209.9773746}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 273.093}}, "t": 1712077210.490947}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.95, "temperature": 73, "power": 278.764}}, "t": 1712077210.998834}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 276.772}}, "t": 1712077211.5135221}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 281.273}}, "t": 1712077212.0246577}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 73, "power": 280.693}}, "t": 1712077212.5330274}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 73, "power": 277.257}}, "t": 1712077213.0422006}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 73, "power": 279.04}}, "t": 1712077213.5518956}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 280.208}}, "t": 1712077214.0613317}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 270.821}}, "t": 1712077214.5709658}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 273.59}}, "t": 1712077215.0801702}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 73, "power": 274.455}}, "t": 1712077215.589858}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 276.263}}, "t": 1712077216.0997112}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 270.718}}, "t": 1712077216.6120064}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 73, "power": 274.82}}, "t": 1712077217.1197004}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 276.088}}, "t": 1712077217.627298}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 73, "power": 280.515}}, "t": 1712077218.1350749}, "pipe": "data"}
+{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"}
+{"event": "line", "data": "elapsed =1.664726972579956, total / elapsed =1337.1562043895976 in_token_count =344 out_token_count =1882\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "train", "rate": 1337.1562043895976, "units": "Tok/s", "t": 1712077220.0371437}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 75, "power": 286.919}}, "t": 1712077218.6444383}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 73, "power": 279.797}}, "t": 1712077219.1558807}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 73, "power": 276.814}}, "t": 1712077219.664499}, "pipe": "data"}
+{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"}
+{"event": "line", "data": "elapsed =7.145230531692505, total / elapsed =294.1822507582684 in_token_count =105 out_token_count =1997\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "train", "rate": 294.1822507582684, "units": "Tok/s", "t": 1712077227.1823947}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 75, "power": 234.44}}, "t": 1712077220.175976}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 73, "power": 281.389}}, "t": 1712077220.6855218}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.95, "temperature": 73, "power": 268.454}}, "t": 1712077221.1933346}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.95, "temperature": 73, "power": 270.239}}, "t": 1712077221.705785}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 274.925}}, "t": 1712077222.2141185}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 279.692}}, "t": 1712077222.7255073}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 73, "power": 277.084}}, "t": 1712077223.2338326}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 277.75}}, "t": 1712077223.741903}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 278.637}}, "t": 1712077224.2501929}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 73, "power": 279.028}}, "t": 1712077224.760983}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 279.606}}, "t": 1712077225.269349}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 73, "power": 279.522}}, "t": 1712077225.7794144}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 73, "power": 276.219}}, "t": 1712077226.2871957}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 274.255}}, "t": 1712077226.7991114}, "pipe": "data"}
+{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"}
+{"event": "line", "data": "elapsed =9.142279148101807, total / elapsed =222.26405112798378 in_token_count =17 out_token_count =2015\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "train", "rate": 222.26405112798378, "units": "Tok/s", "t": 1712077236.324694}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.94, "temperature": 74, "power": 272.351}}, "t": 1712077227.3074791}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 280.982}}, "t": 1712077227.8190534}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.94, "temperature": 73, "power": 277.424}}, "t": 1712077228.3266635}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 276.504}}, "t": 1712077228.8378255}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.94, "temperature": 73, "power": 274.974}}, "t": 1712077229.3460875}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 269.452}}, "t": 1712077229.8551915}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.95, "temperature": 73, "power": 279.105}}, "t": 1712077230.364273}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.94, "temperature": 73, "power": 273.938}}, "t": 1712077230.8748405}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 280.208}}, "t": 1712077231.3843439}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 279.811}}, "t": 1712077231.8939593}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 282.078}}, "t": 1712077232.4064298}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 73, "power": 274.706}}, "t": 1712077232.9147422}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 270.508}}, "t": 1712077233.4275587}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 281.88}}, "t": 1712077233.9351544}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 73, "power": 282.93}}, "t": 1712077234.4445524}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 73, "power": 281.467}}, "t": 1712077234.952787}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 73, "power": 279.326}}, "t": 1712077235.4635131}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 73, "power": 279.088}}, "t": 1712077235.971866}, "pipe": "data"}
+{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"}
+{"event": "line", "data": "elapsed =4.6274635791778564, total / elapsed =474.3419288866617 in_token_count =216 out_token_count =1979\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "train", "rate": 474.3419288866617, "units": "Tok/s", "t": 1712077240.952175}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 75, "power": 296.652}}, "t": 1712077236.4792805}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 73, "power": 281.954}}, "t": 1712077236.9895573}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 281.774}}, "t": 1712077237.4992352}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 284.565}}, "t": 1712077238.0082476}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 281.561}}, "t": 1712077238.5158758}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 73, "power": 282.147}}, "t": 1712077239.026364}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 280.004}}, "t": 1712077239.5361457}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 73, "power": 278.26}}, "t": 1712077240.0461094}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 278.118}}, "t": 1712077240.5559726}, "pipe": "data"}
+{"event": "end", "data": {"command": ["python", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/llama/main.py", "--cache", "/Users/satyaortiz-gagne/travail/mila/milabench/cache"], "time": 1712077241.82962, "return_code": 0}, "pipe": null}
diff --git a/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/opt-1_3b-multinode.data b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/opt-1_3b-multinode.data
new file mode 100644
index 000000000..3b45f0015
--- /dev/null
+++ b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/opt-1_3b-multinode.data
@@ -0,0 +1 @@
+{"event": "message", "data": {"message": "Skip opt-1_3b-multinode because the following capability is not satisfied: len(nodes) >= 2"}, "pipe": null}
diff --git a/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/opt-1_3b.data b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/opt-1_3b.data
new file mode 100644
index 000000000..8e5013d68
--- /dev/null
+++ b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/opt-1_3b.data
@@ -0,0 +1 @@
+{"event": "error", "data": {"type": "KeyError", "message": "'port'", "trace": "Traceback (most recent call last):\n File \"/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/multi.py\", line 202, in do_run\n await exec_plan.execute(\"run\", timeout=True, timeout_delay=600)\n File \"/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 108, in execute\n return await execute_command(self, phase, timeout, timeout_delay, **kwargs)\n File \"/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/executors.py\", line 57, in execute_command\n for pack, argv, _kwargs in command.commands():\n File \"/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 150, in commands\n yield from executor.commands()\n File \"/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 150, in commands\n yield from executor.commands()\n File \"/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 125, in commands\n yield self.pack, self.argv(), self.kwargs()\n File \"/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 121, in argv\n return self._argv(**kwargs) + self.exec.argv(**kwargs)\n File \"/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 304, in argv\n script_args = self.exec.argv(**kwargs)\n File \"/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 122, in argv\n return self._argv(**kwargs)\n File \"/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 682, in _argv\n f\"--main_process_port={manager['port']}\",\nKeyError: 'port'\n"}, "pipe": null}
diff --git a/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/opt-6_7b-multinode.data b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/opt-6_7b-multinode.data
new file mode 100644
index 000000000..cccd5c098
--- /dev/null
+++ b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/opt-6_7b-multinode.data
@@ -0,0 +1 @@
+{"event": "message", "data": {"message": "Skip opt-6_7b-multinode because the following capability is not satisfied: len(nodes) >= 2"}, "pipe": null}
diff --git a/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/opt-6_7b.data b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/opt-6_7b.data
new file mode 100644
index 000000000..8e5013d68
--- /dev/null
+++ b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/opt-6_7b.data
@@ -0,0 +1 @@
+{"event": "error", "data": {"type": "KeyError", "message": "'port'", "trace": "Traceback (most recent call last):\n File \"/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/multi.py\", line 202, in do_run\n await exec_plan.execute(\"run\", timeout=True, timeout_delay=600)\n File \"/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 108, in execute\n return await execute_command(self, phase, timeout, timeout_delay, **kwargs)\n File \"/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/executors.py\", line 57, in execute_command\n for pack, argv, _kwargs in command.commands():\n File \"/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 150, in commands\n yield from executor.commands()\n File \"/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 150, in commands\n yield from executor.commands()\n File \"/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 125, in commands\n yield self.pack, self.argv(), self.kwargs()\n File \"/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 121, in argv\n return self._argv(**kwargs) + self.exec.argv(**kwargs)\n File \"/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 304, in argv\n script_args = self.exec.argv(**kwargs)\n File \"/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 122, in argv\n return self._argv(**kwargs)\n File \"/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 682, in _argv\n f\"--main_process_port={manager['port']}\",\nKeyError: 'port'\n"}, "pipe": null}
diff --git a/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/reformer.D0.data b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/reformer.D0.data
new file mode 100644
index 000000000..23619f512
--- /dev/null
+++ b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/reformer.D0.data
@@ -0,0 +1,291 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "hf", "install_group": "torch", "install_variant": "cuda", "run_name": "majaguma.2024-04-02_16:55:21.895752", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "b7a983fcf74c005cfc04d84913c69872", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "argv": {"--precision": "tf32-fp16", "--num-workers": 8, "--model": "Reformer", "--batch-size": 64}, "plan": {"method": "per_gpu"}, "tags": ["huggingface", "language-modeling", "nlp", "transformer"], "weight": 1.0, "name": "reformer", "tag": ["reformer", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "trustingspider", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-b19ad74c-adf3-683a-1bdd-0ce516ef4aae": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 73, "power": 106.069, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712078112.970693, "milabench": {"tag": "paice-v1-10-g1243bba", "commit": "1243bbae76bfc8105d553472dcfce834b54a723e", "date": "2024-04-02 12:02:23 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-reformer.D0-0efae956f1553a76c1e03985181900f5.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "Reformer", "--batch-size", "64"], "time": 1712078112.9860363}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"}
+{"event": "line", "data": "We strongly recommend passing in an `attention_mask` since your input_ids may be padded. See https://huggingface.co/docs/transformers/troubleshooting#incorrect-output-when-padding-tokens-arent-masked.\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "train", "loss": 5.877161979675293}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [1, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 5.84889030456543}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 1.0, "temperature": 73, "power": 286.263}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [2, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 5.813286781311035}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [3, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 5.790142059326172}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [4, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 5.774377822875977}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 1.0, "temperature": 74, "power": 250.749}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [5, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 5.76326847076416}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [6, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 5.755447864532471}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 42.38314549082414, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [7, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 5.75070333480835}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 1.0, "temperature": 74, "power": 302.534}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 55.842465612384196, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [8, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 5.745652198791504}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 55.63145888626222, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [9, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 5.739924907684326}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 55.68464590587847, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [10, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 5.735757827758789}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 0.98, "temperature": 75, "power": 319.986}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 55.85780509668206, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [11, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 5.7321648597717285}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 55.762408387688765, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [12, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 5.729575157165527}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 55.72479019546835, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [13, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 5.726365566253662}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 1.0, "temperature": 75, "power": 249.038}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 55.755593411803034, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [14, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 5.723445415496826}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 55.831244570885815, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [15, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 5.720085620880127}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 1.0, "temperature": 75, "power": 247.779}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 55.89020690222747, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [16, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 5.716990947723389}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 55.81798156705002, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [17, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 5.713686943054199}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 55.7296009861144, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [18, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 5.711978912353516}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 1.0, "temperature": 75, "power": 284.1}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 55.692559881338724, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [19, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 5.708196640014648}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 55.658595850231464, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [20, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 5.7056193351745605}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 55.57613715519568, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [21, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 5.702970504760742}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 1.0, "temperature": 75, "power": 257.581}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 55.590725761697435, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [22, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 5.701535224914551}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 55.65477032906796, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [23, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 5.698465347290039}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 55.62810092492661, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [24, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 5.6952409744262695}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 1.0, "temperature": 76, "power": 311.354}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 55.61688596614815, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [25, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 5.693437576293945}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 55.587803623145646, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [26, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 5.690184116363525}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 55.53808015152288, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [27, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 5.687097549438477}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 1.0, "temperature": 76, "power": 292.283}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 55.602905049185736, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [28, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 5.684821605682373}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 55.60110854953425, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [29, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 5.6812567710876465}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 1.0, "temperature": 76, "power": 257.582}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 55.540157914588704, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [30, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 5.678829669952393}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 55.54124119881017, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [31, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 5.675886631011963}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 55.586025382781784, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [32, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 5.67181396484375}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 1.0, "temperature": 76, "power": 286.584}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 55.530624329785496, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [33, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 5.6685662269592285}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 55.524061459611396, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [34, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 5.664484977722168}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 55.52944741216129, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [35, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 5.661380767822266}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 1.0, "temperature": 76, "power": 296.599}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 55.61860682635961, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [36, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 5.656714916229248}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 55.5812057995563, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [37, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 5.652444839477539}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 55.561579935613366, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [38, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 5.648081302642822}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 0.98, "temperature": 76, "power": 327.363}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 55.57395835085758, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [39, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 5.6426286697387695}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 55.642632169324386, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [40, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 5.636715888977051}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 55.56922587454374, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [41, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 5.631279468536377}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 1.0, "temperature": 76, "power": 276.736}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 55.49676511523986, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [42, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 5.624204158782959}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 55.504875316254754, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [43, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 5.617888450622559}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 1.0, "temperature": 76, "power": 294.356}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 55.5479032050464, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [44, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 5.609550952911377}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 55.597072283192254, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [45, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 5.600521087646484}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 55.50102088837983, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [46, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 5.592146873474121}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 1.0, "temperature": 76, "power": 310.488}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 55.502168636235126, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [47, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 5.585984706878662}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 55.569347800147355, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [48, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 5.573252201080322}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 55.57391487109742, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [49, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 5.566571235656738}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 1.0, "temperature": 76, "power": 319.918}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 55.57587548502977, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [50, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 5.552077770233154}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 55.58684800758302, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [51, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 5.538307189941406}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 55.6049095948316, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [52, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 5.525749206542969}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 1.0, "temperature": 76, "power": 282.145}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 55.59864445674683, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [53, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 5.51253080368042}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 55.55511203093286, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [54, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 5.498420238494873}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 55.47464814084089, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [55, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 5.486696243286133}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 1.0, "temperature": 76, "power": 322.228}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 55.55849644232166, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [56, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 5.4735894203186035}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 55.55699371971329, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [57, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 5.468693733215332}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 1.0, "temperature": 76, "power": 287.737}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 55.54037085673302, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [58, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 5.46619176864624}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 55.537931904178045, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [59, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 5.457893371582031}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 55.494300638209424, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [60, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 5.445857048034668}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 1.0, "temperature": 76, "power": 311.649}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 55.48301397106765, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [61, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 5.424015998840332}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 55.576752056001375, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [62, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 5.412559986114502}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 55.56804997370876, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [63, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 5.405117034912109}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 1.0, "temperature": 77, "power": 323.327}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 55.52122714709638, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [64, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 5.394566535949707}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 55.494944285711945, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [65, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 5.387679100036621}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 55.570233429364485, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [66, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 5.370873928070068}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 0.98, "temperature": 76, "power": 285.293}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 55.589726686012376, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 1.0, "temperature": 76, "power": 301.482}}}, "pipe": "data"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-reformer.D0-0efae956f1553a76c1e03985181900f5.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "Reformer", "--batch-size", "64"], "time": 1712078188.512789, "return_code": 0}, "pipe": null}
diff --git a/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/regnet_y_128gf.D0.data b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/regnet_y_128gf.D0.data
new file mode 100644
index 000000000..21d55738c
--- /dev/null
+++ b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/regnet_y_128gf.D0.data
@@ -0,0 +1,404 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "torchvision", "install_group": "torch", "install_variant": "cuda", "run_name": "majaguma.2024-04-02_16:55:21.895752", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "b7a983fcf74c005cfc04d84913c69872", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision", "plan": {"method": "per_gpu"}, "argv": {"--precision": "tf32-fp16", "--lr": 0.01, "--no-stdout": true, "--epochs": 50, "--model": "regnet_y_128gf", "--batch-size": 64}, "tags": ["classification", "convnet", "lstm", "resnet", "vision"], "weight": 2.0, "name": "regnet_y_128gf", "tag": ["regnet_y_128gf", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "trustingspider", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-b19ad74c-adf3-683a-1bdd-0ce516ef4aae": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 71, "power": 101.054, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712077741.768871, "milabench": {"tag": "paice-v1-10-g1243bba", "commit": "1243bbae76bfc8105d553472dcfce834b54a723e", "date": "2024-04-02 12:02:23 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-regnet_y_128gf.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32-fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "regnet_y_128gf", "--batch-size", "64"], "time": 1712077741.785265}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "progress": [0, 50]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [887.5625, 81920.0], "load": 0, "temperature": 66, "power": 60.471}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3952.375, 81920.0], "load": 0, "temperature": 66, "power": 93.463}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.03619384765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.00616455078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.94366455078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92230224609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 0.94, "temperature": 71, "power": 295.037}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.02435302734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.16741943359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96478271484375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.15771484375}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 0.99, "temperature": 72, "power": 301.568}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.043701171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.017822265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 84.98551842782294, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.13763427734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.05963134765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 75.44691104484649, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.03607177734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 1.0, "temperature": 73, "power": 323.234}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97662353515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 75.56104566371464, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.35614013671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.4664306640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 75.64288594764278, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.239990234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 0.99, "temperature": 73, "power": 294.725}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.06005859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 75.78456530737698, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.17510986328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.5086669921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 76.10062097113837, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.4044189453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9764404296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 1.0, "temperature": 74, "power": 218.053}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 76.45869703157555, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.06768798828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.12451171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 76.32774359566, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.07763671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.01544189453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 83.45148977677785, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 0.99, "temperature": 74, "power": 301.636}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.04644775390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.14105224609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 90.72912826553359, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.45257568359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 66.62103209244626, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.43798828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.08331298828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 1.0, "temperature": 75, "power": 318.633}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 75.82531964013918, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.04290771484375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.01446533203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 76.34880028862328, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9464111328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.25390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 76.08548985374642, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 1.0, "temperature": 75, "power": 286.569}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.16436767578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.26544189453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 75.19990384564295, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.03173828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.1256103515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 85.53026260049819, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.007080078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 0.96, "temperature": 76, "power": 337.914}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.10318274591698, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.047119140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.1983642578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 75.73469147345757, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.35552978515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92474365234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 76.43881534569796, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 0.99, "temperature": 76, "power": 307.61}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.2254638671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.22601318359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 75.7413024948707, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.05743408203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.06365966796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 75.12117892741466, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.277587890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 0.96, "temperature": 76, "power": 305.139}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.32281494140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 89.79228287366135, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.150390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 81.94335015751093, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.38861083984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0684814453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 75.56454854232919, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 1.0, "temperature": 76, "power": 288.174}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9925537109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0540771484375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 76.23063421632908, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.20361328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.15069580078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 75.74113021364218, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.054931640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 0.96, "temperature": 76, "power": 299.898}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.018310546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 76.48645317773433, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.141845703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9903564453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.13518332940372, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.00531005859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 75.1342006002105, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.86383056640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 1.0, "temperature": 77, "power": 307.189}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.1290283203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 75.64287334170248, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "progress": [1, 50]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.7943115234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 75.95451562098192, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.00726318359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 0.99, "temperature": 76, "power": 306.915}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.90667724609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 65.00787395176391, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.90716552734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.01605224609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 76.05649538510784, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.853515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.947509765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 0.97, "temperature": 76, "power": 314.279}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 75.83291793384016, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.98956298828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.01806640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 80.42923125283956, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0294189453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.84466552734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 86.02788063200617, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 0.95, "temperature": 77, "power": 295.41}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.89312744140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 74.86631084853728, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9566650390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.14141845703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 75.52137661800205, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.14886474609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 1.0, "temperature": 77, "power": 295.016}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.1663818359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 76.01757209606161, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.2611083984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.05230712890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 75.47794871984293, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.191650390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.1234130859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 78.74383658456956, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 0.99, "temperature": 77, "power": 296.705}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.02764892578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.987060546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 88.09311072363006, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0841064453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 74.8300009407885, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92816162109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.04681396484375}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 1.0, "temperature": 77, "power": 298.61}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 75.50946321846355, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9100341796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96356201171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 75.86461140283208, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97906494140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.5179443359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 75.43861316609807, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 1.0, "temperature": 77, "power": 306.71}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.2796630859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.15191650390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 75.85908370621165, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.99945068359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.021728515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.8063557629032, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.10284423828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 0.99, "temperature": 78, "power": 326.798}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 74.55281723342608, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.02862548828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8790283203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 75.26970529071299, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.13043212890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.05841064453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 75.62476504552366, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 1.0, "temperature": 78, "power": 317.391}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0330810546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97674560546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 75.38286047887179, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.036376953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.22381591796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 80.08832455780012, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.03515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 1.0, "temperature": 78, "power": 330.027}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.04339599609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 86.02331163464297, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.39404296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 74.51878076804775, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.2197265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.99554443359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 75.37728952473498, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 1.0, "temperature": 78, "power": 299.789}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.00244140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.09002685546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 75.77026642514522, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.98956298828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0643310546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 75.38854037479985, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9072265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 0.96, "temperature": 78, "power": 304.065}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.2716064453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 78.39624291395576, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.99871826171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9783935546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 88.24513712055776, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92926025390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 74.56261801281862, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 1.0, "temperature": 78, "power": 310.599}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 1.0, "temperature": 78, "power": 310.599}}}, "pipe": "data"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-regnet_y_128gf.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32-fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "regnet_y_128gf", "--batch-size", "64"], "time": 1712077833.3021524, "return_code": 0}, "pipe": null}
diff --git a/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/resnet152-multi.0.data b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/resnet152-multi.0.data
new file mode 100644
index 000000000..aa6e9b4c9
--- /dev/null
+++ b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/resnet152-multi.0.data
@@ -0,0 +1,304 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "timm", "install_group": "torch", "install_variant": "cuda", "run_name": "majaguma.2024-04-02_16:55:21.895752", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "b7a983fcf74c005cfc04d84913c69872", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm", "plan": {"method": "njobs", "n": 1}, "argv": {"--amp": true, "--model": "resnet152", "--batch-size": 256}, "tags": ["classification", "convnet", "multigpu", "resnet", "vision"], "weight": 5.0, "name": "resnet152-multi", "tag": ["resnet152-multi", "0"], "job-number": 0, "devices": ["0"]}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "trustingspider", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-b19ad74c-adf3-683a-1bdd-0ce516ef4aae": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0.01, "memory": 0.010771942138671876}, "temperature": 70, "power": 99.706, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712078410.803939, "milabench": {"tag": "paice-v1-10-g1243bba", "commit": "1243bbae76bfc8105d553472dcfce834b54a723e", "date": "2024-04-02 12:02:23 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-resnet152-multi.0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "resnet152", "--batch-size", "256", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/majaguma.2024-04-02_16:55:21.895752/resnet152-multi.0", "--checkpoint-hist", "1"], "time": 1712078410.8207672}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "line", "data": "Training with a single process on 1 device (cuda:0).\n", "pipe": "stderr"}
+{"event": "line", "data": "Model resnet152 created, param count:60192808\n", "pipe": "stderr"}
+{"event": "line", "data": "Data processing configuration for current model + dataset:\n", "pipe": "stderr"}
+{"event": "line", "data": "\tinput_size: (3, 224, 224)\n", "pipe": "stderr"}
+{"event": "line", "data": "\tinterpolation: bicubic\n", "pipe": "stderr"}
+{"event": "line", "data": "\tmean: (0.485, 0.456, 0.406)\n", "pipe": "stderr"}
+{"event": "line", "data": "\tstd: (0.229, 0.224, 0.225)\n", "pipe": "stderr"}
+{"event": "line", "data": "\tcrop_pct: 0.95\n", "pipe": "stderr"}
+{"event": "line", "data": "\tcrop_mode: center\n", "pipe": "stderr"}
+{"event": "line", "data": "Learning rate (0.1) calculated from base learning rate (0.1) and global batch size (256) with linear scaling.\n", "pipe": "stderr"}
+{"event": "line", "data": "Using native Torch AMP. Training in mixed precision.\n", "pipe": "stderr"}
+{"event": "line", "data": "Scheduled epochs: 300. LR stepped per epoch.\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "train", "loss": 6.928679466247559}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [23996.375, 81920.0], "load": 1.0, "temperature": 70, "power": 284.718}}}, "pipe": "data"}
+{"event": "line", "data": "Train: 0 [ 0/16 ( 0%)] Loss: 6.929 (6.93) Time: 3.440s, 74.42/s (3.440s, 74.42/s) LR: 1.000e-05 Data: 1.245 (1.245)\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "train", "loss": 6.93458366394043}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.958017349243164}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27176.375, 81920.0], "load": 1.0, "temperature": 71, "power": 312.674}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.951747417449951}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.938842296600342}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27176.375, 81920.0], "load": 0.94, "temperature": 72, "power": 322.148}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.954232215881348}, "pipe": "data"}
+{"event": "line", "data": "Train: 0 [ 15/16 (100%)] Loss: 6.934 (6.94) Time: 0.362s, 707.33/s (0.584s, 438.65/s) LR: 1.000e-05 Data: 0.000 (0.094)\n", "pipe": "stderr"}
+{"event": "line", "data": "Test: [ 0/16] Time: 1.135 (1.135) Loss: 6.9339 (6.9339) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"}
+{"event": "line", "data": "Test: [ 16/16] Time: 0.256 (0.255) Loss: 6.8979 (6.9128) Acc@1: 0.0000 ( 0.1453) Acc@5: 28.1250 ( 0.5329)\n", "pipe": "stderr"}
+{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"}
+{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/majaguma.2024-04-02_16:55:21.895752/resnet152-multi.0/20240402-172015-resnet152-224/checkpoint-0.pth.tar', 0.14534883720930233)\n", "pipe": "stderr"}
+{"event": "line", "data": "\n", "pipe": "stderr"}
+{"event": "data", "data": {"rate": 707.0748652460082, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27660.375, 81920.0], "load": 0.5, "temperature": 68, "power": 94.45}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4266.375, 81920.0], "load": 1.0, "temperature": 68, "power": 96.145}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.963781833648682}, "pipe": "data"}
+{"event": "line", "data": "Train: 1 [ 0/16 ( 0%)] Loss: 6.964 (6.96) Time: 1.407s, 182.00/s (1.407s, 182.00/s) LR: 2.001e-02 Data: 1.042 (1.042)\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "train", "loss": 6.906595230102539}, "pipe": "data"}
+{"event": "data", "data": {"rate": 679.20529280488, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27194.375, 81920.0], "load": 0.99, "temperature": 72, "power": 321.097}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 638.6815951304361, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.927788257598877}, "pipe": "data"}
+{"event": "data", "data": {"rate": 622.7818045761736, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.902334213256836}, "pipe": "data"}
+{"event": "data", "data": {"rate": 625.4338815218415, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.895504951477051}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27194.375, 81920.0], "load": 0.99, "temperature": 72, "power": 325.818}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 625.0208675993767, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.976802825927734}, "pipe": "data"}
+{"event": "line", "data": "Train: 1 [ 15/16 (100%)] Loss: 6.977 (6.92) Time: 0.365s, 701.13/s (0.441s, 579.85/s) LR: 2.001e-02 Data: 0.000 (0.076)\n", "pipe": "stderr"}
+{"event": "line", "data": "Test: [ 0/16] Time: 0.979 (0.979) Loss: 6.8470 (6.8470) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"}
+{"event": "line", "data": "Test: [ 16/16] Time: 0.018 (0.229) Loss: 6.5615 (6.8342) Acc@1: 18.7500 ( 0.2422) Acc@5: 28.1250 ( 1.2355)\n", "pipe": "stderr"}
+{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"}
+{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/majaguma.2024-04-02_16:55:21.895752/resnet152-multi.0/20240402-172015-resnet152-224/checkpoint-1.pth.tar', 0.24224806201550386)\n", "pipe": "stderr"}
+{"event": "line", "data": "\n", "pipe": "stderr"}
+{"event": "data", "data": {"rate": 700.26152839631, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27678.375, 81920.0], "load": 0, "temperature": 68, "power": 94.104}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27678.375, 81920.0], "load": 0.03, "temperature": 68, "power": 94.158}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.833113193511963}, "pipe": "data"}
+{"event": "line", "data": "Train: 2 [ 0/16 ( 0%)] Loss: 6.833 (6.83) Time: 1.402s, 182.66/s (1.402s, 182.66/s) LR: 4.001e-02 Data: 1.036 (1.036)\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "train", "loss": 6.856740474700928}, "pipe": "data"}
+{"event": "data", "data": {"rate": 686.0018190892456, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28162.375, 81920.0], "load": 0.99, "temperature": 71, "power": 256.65}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 618.474316824814, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.901230335235596}, "pipe": "data"}
+{"event": "data", "data": {"rate": 620.2816104679656, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.94493293762207}, "pipe": "data"}
+{"event": "data", "data": {"rate": 622.0104375311398, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9797515869140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28162.375, 81920.0], "load": 0.99, "temperature": 72, "power": 315.93}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 621.2230583436107, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.940389156341553}, "pipe": "data"}
+{"event": "line", "data": "Train: 2 [ 15/16 (100%)] Loss: 6.940 (6.92) Time: 0.366s, 698.50/s (0.442s, 579.01/s) LR: 4.001e-02 Data: 0.000 (0.076)\n", "pipe": "stderr"}
+{"event": "line", "data": "Test: [ 0/16] Time: 0.983 (0.983) Loss: 6.8053 (6.8053) Acc@1: 0.7812 ( 0.7812) Acc@5: 3.1250 ( 3.1250)\n", "pipe": "stderr"}
+{"event": "line", "data": "Test: [ 16/16] Time: 0.019 (0.234) Loss: 6.3810 (6.8034) Acc@1: 0.0000 ( 0.1453) Acc@5: 3.1250 ( 1.1386)\n", "pipe": "stderr"}
+{"event": "data", "data": {"rate": 699.0112668574657, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28646.375, 81920.0], "load": 0.55, "temperature": 71, "power": 301.018}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28646.375, 81920.0], "load": 0, "temperature": 68, "power": 94.158}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.861546039581299}, "pipe": "data"}
+{"event": "line", "data": "Train: 3 [ 0/16 ( 0%)] Loss: 6.862 (6.86) Time: 1.419s, 180.38/s (1.419s, 180.38/s) LR: 6.000e-02 Data: 1.054 (1.054)\n", "pipe": "stderr"}
+{"event": "data", "data": {"rate": 574.9147911994536, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.910906791687012}, "pipe": "data"}
+{"event": "data", "data": {"rate": 615.3708341350615, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.954673767089844}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [29130.375, 81920.0], "load": 0.92, "temperature": 72, "power": 296.694}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 619.048402262069, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.991860389709473}, "pipe": "data"}
+{"event": "data", "data": {"rate": 618.8276175417132, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.054144382476807}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [29130.375, 81920.0], "load": 0.94, "temperature": 73, "power": 261.696}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 622.4347028431811, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.03437614440918}, "pipe": "data"}
+{"event": "line", "data": "Train: 3 [ 15/16 (100%)] Loss: 7.034 (6.96) Time: 0.366s, 699.12/s (0.444s, 576.57/s) LR: 6.000e-02 Data: 0.000 (0.077)\n", "pipe": "stderr"}
+{"event": "line", "data": "Test: [ 0/16] Time: 0.969 (0.969) Loss: 6.9324 (6.9324) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.3906 ( 0.3906)\n", "pipe": "stderr"}
+{"event": "line", "data": "Test: [ 16/16] Time: 0.018 (0.227) Loss: 6.3954 (6.8244) Acc@1: 0.0000 ( 0.2665) Acc@5: 0.0000 ( 1.1143)\n", "pipe": "stderr"}
+{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"}
+{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/majaguma.2024-04-02_16:55:21.895752/resnet152-multi.0/20240402-172015-resnet152-224/checkpoint-3.pth.tar', 0.26647286821705424)\n", "pipe": "stderr"}
+{"event": "line", "data": "\n", "pipe": "stderr"}
+{"event": "data", "data": {"rate": 698.2562199211644, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [29614.375, 81920.0], "load": 0.9, "temperature": 71, "power": 92.878}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [29614.375, 81920.0], "load": 0, "temperature": 68, "power": 93.866}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.864370346069336}, "pipe": "data"}
+{"event": "line", "data": "Train: 4 [ 0/16 ( 0%)] Loss: 6.864 (6.86) Time: 1.374s, 186.26/s (1.374s, 186.26/s) LR: 8.000e-02 Data: 1.008 (1.008)\n", "pipe": "stderr"}
+{"event": "data", "data": {"rate": 633.9749328466077, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.925772666931152}, "pipe": "data"}
+{"event": "data", "data": {"rate": 616.5410220554338, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.979971885681152}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [30098.375, 81920.0], "load": 0.94, "temperature": 73, "power": 309.73}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 616.7090113287061, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.047486305236816}, "pipe": "data"}
+{"event": "data", "data": {"rate": 621.240783444034, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0692853927612305}, "pipe": "data"}
+{"event": "data", "data": {"rate": 621.1234848272869, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.03903341293335}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [30098.375, 81920.0], "load": 0.99, "temperature": 73, "power": 304.283}}}, "pipe": "data"}
+{"event": "line", "data": "Train: 4 [ 15/16 (100%)] Loss: 7.039 (7.00) Time: 0.368s, 695.92/s (0.442s, 579.76/s) LR: 8.000e-02 Data: 0.001 (0.075)\n", "pipe": "stderr"}
+{"event": "line", "data": "Test: [ 0/16] Time: 0.968 (0.968) Loss: 6.8013 (6.8013) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"}
+{"event": "line", "data": "Test: [ 16/16] Time: 0.020 (0.232) Loss: 6.5208 (6.8458) Acc@1: 0.0000 ( 0.1938) Acc@5: 0.0000 ( 1.2597)\n", "pipe": "stderr"}
+{"event": "data", "data": {"rate": 695.1499309214444, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [30582.375, 81920.0], "load": 0.49, "temperature": 72, "power": 322.828}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [30582.375, 81920.0], "load": 0, "temperature": 68, "power": 93.963}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.870933532714844}, "pipe": "data"}
+{"event": "line", "data": "Train: 5 [ 0/16 ( 0%)] Loss: 6.871 (6.87) Time: 1.461s, 175.25/s (1.461s, 175.25/s) LR: 9.993e-02 Data: 1.095 (1.095)\n", "pipe": "stderr"}
+{"event": "data", "data": {"rate": 617.4417904279894, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.974578857421875}, "pipe": "data"}
+{"event": "data", "data": {"rate": 682.7613338014693, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0785064697265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31066.375, 81920.0], "load": 0.97, "temperature": 73, "power": 321.738}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 696.4129160798198, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.1159820556640625}, "pipe": "data"}
+{"event": "data", "data": {"rate": 594.1094227114396, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.087150573730469}, "pipe": "data"}
+{"event": "data", "data": {"rate": 618.0559025190126, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.095391273498535}, "pipe": "data"}
+{"event": "line", "data": "Train: 5 [ 15/16 (100%)] Loss: 7.095 (7.04) Time: 0.367s, 698.06/s (0.447s, 573.04/s) LR: 9.993e-02 Data: 0.000 (0.080)\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31066.375, 81920.0], "load": 1.0, "temperature": 74, "power": 307.973}}}, "pipe": "data"}
+{"event": "line", "data": "Test: [ 0/16] Time: 0.952 (0.952) Loss: 6.9660 (6.9660) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"}
+{"event": "line", "data": "Test: [ 16/16] Time: 0.019 (0.227) Loss: 6.3292 (6.8915) Acc@1: 0.0000 ( 0.2180) Acc@5: 0.0000 ( 0.9690)\n", "pipe": "stderr"}
+{"event": "data", "data": {"rate": 697.274328954285, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31550.375, 81920.0], "load": 0.95, "temperature": 73, "power": 263.384}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.892255783081055}, "pipe": "data"}
+{"event": "line", "data": "Train: 6 [ 0/16 ( 0%)] Loss: 6.892 (6.89) Time: 1.440s, 177.72/s (1.440s, 177.72/s) LR: 9.990e-02 Data: 1.074 (1.074)\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31886.375, 81920.0], "load": 0.78, "temperature": 71, "power": 305.17}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 533.4449433516061, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.007454872131348}, "pipe": "data"}
+{"event": "data", "data": {"rate": 667.5955354506842, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.977649688720703}, "pipe": "data"}
+{"event": "data", "data": {"rate": 648.5725892702532, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32034.375, 81920.0], "load": 0.99, "temperature": 73, "power": 308.155}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.015753746032715}, "pipe": "data"}
+{"event": "data", "data": {"rate": 695.525135994952, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.076228141784668}, "pipe": "data"}
+{"event": "data", "data": {"rate": 595.2128546036964, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.142056465148926}, "pipe": "data"}
+{"event": "line", "data": "Train: 6 [ 15/16 (100%)] Loss: 7.142 (7.03) Time: 0.368s, 695.64/s (0.446s, 574.31/s) LR: 9.990e-02 Data: 0.000 (0.078)\n", "pipe": "stderr"}
+{"event": "data", "data": {"rate": 623.5005476530862, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"}
+{"event": "line", "data": "Test: [ 0/16] Time: 1.005 (1.005) Loss: 6.8032 (6.8032) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"}
+{"event": "line", "data": "Test: [ 16/16] Time: 0.018 (0.234) Loss: 6.4867 (6.8581) Acc@1: 0.0000 ( 0.2665) Acc@5: 0.0000 ( 1.2112)\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32034.375, 81920.0], "load": 0.99, "temperature": 71, "power": 96.91}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 695.5290766066321, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32518.375, 81920.0], "load": 0.9, "temperature": 70, "power": 296.207}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.864059925079346}, "pipe": "data"}
+{"event": "line", "data": "Train: 7 [ 0/16 ( 0%)] Loss: 6.864 (6.86) Time: 1.468s, 174.39/s (1.468s, 174.39/s) LR: 9.987e-02 Data: 1.101 (1.101)\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32854.375, 81920.0], "load": 0.99, "temperature": 72, "power": 294.527}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.928898811340332}, "pipe": "data"}
+{"event": "data", "data": {"rate": 616.3868346950447, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.994691848754883}, "pipe": "data"}
+{"event": "data", "data": {"rate": 617.5208853443247, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33002.375, 81920.0], "load": 0.99, "temperature": 73, "power": 301.93}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.032052516937256}, "pipe": "data"}
+{"event": "data", "data": {"rate": 616.4052911726154, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0799455642700195}, "pipe": "data"}
+{"event": "data", "data": {"rate": 616.508541577737, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.042294025421143}, "pipe": "data"}
+{"event": "line", "data": "Train: 7 [ 15/16 (100%)] Loss: 7.042 (6.99) Time: 0.368s, 695.99/s (0.448s, 571.67/s) LR: 9.987e-02 Data: 0.000 (0.080)\n", "pipe": "stderr"}
+{"event": "data", "data": {"rate": 626.9884529569433, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"}
+{"event": "line", "data": "Test: [ 0/16] Time: 0.957 (0.957) Loss: 6.8256 (6.8256) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"}
+{"event": "line", "data": "Test: [ 16/16] Time: 0.018 (0.226) Loss: 6.5559 (6.8334) Acc@1: 0.0000 ( 0.2422) Acc@5: 0.0000 ( 0.9448)\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33002.375, 81920.0], "load": 0.48, "temperature": 70, "power": 96.731}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 695.8750108665911, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33486.375, 81920.0], "load": 0, "temperature": 72, "power": 324.307}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.889812469482422}, "pipe": "data"}
+{"event": "line", "data": "Train: 8 [ 0/16 ( 0%)] Loss: 6.890 (6.89) Time: 1.404s, 182.30/s (1.404s, 182.30/s) LR: 9.982e-02 Data: 1.038 (1.038)\n", "pipe": "stderr"}
+{"event": "data", "data": {"rate": 637.3029577815294, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33970.375, 81920.0], "load": 0.94, "temperature": 73, "power": 289.482}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.915931701660156}, "pipe": "data"}
+{"event": "data", "data": {"rate": 696.7677306062536, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.943663597106934}, "pipe": "data"}
+{"event": "data", "data": {"rate": 594.5455805506919, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.988678932189941}, "pipe": "data"}
+{"event": "data", "data": {"rate": 615.5034218475955, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33970.375, 81920.0], "load": 0.94, "temperature": 73, "power": 223.147}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.94257116317749}, "pipe": "data"}
+{"event": "data", "data": {"rate": 618.5969795861934, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.018918037414551}, "pipe": "data"}
+{"event": "line", "data": "Train: 8 [ 15/16 (100%)] Loss: 7.019 (6.94) Time: 0.369s, 692.84/s (0.444s, 576.78/s) LR: 9.982e-02 Data: 0.000 (0.076)\n", "pipe": "stderr"}
+{"event": "data", "data": {"rate": 623.3629640368691, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"}
+{"event": "line", "data": "Test: [ 0/16] Time: 0.946 (0.946) Loss: 6.8033 (6.8033) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"}
+{"event": "line", "data": "Test: [ 16/16] Time: 0.018 (0.225) Loss: 6.6473 (6.8228) Acc@1: 0.0000 ( 0.2907) Acc@5: 0.0000 ( 1.0174)\n", "pipe": "stderr"}
+{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"}
+{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/majaguma.2024-04-02_16:55:21.895752/resnet152-multi.0/20240402-172015-resnet152-224/checkpoint-8.pth.tar', 0.29069767441860467)\n", "pipe": "stderr"}
+{"event": "line", "data": "\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33970.375, 81920.0], "load": 0, "temperature": 70, "power": 96.035}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 692.5510732339538, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34454.375, 81920.0], "load": 0.94, "temperature": 73, "power": 305.064}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.901410102844238}, "pipe": "data"}
+{"event": "line", "data": "Train: 9 [ 0/16 ( 0%)] Loss: 6.901 (6.90) Time: 1.474s, 173.70/s (1.474s, 173.70/s) LR: 9.978e-02 Data: 1.106 (1.106)\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34938.375, 81920.0], "load": 0.93, "temperature": 73, "power": 304.756}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.856259822845459}, "pipe": "data"}
+{"event": "data", "data": {"rate": 618.4951658429918, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.910282135009766}, "pipe": "data"}
+{"event": "data", "data": {"rate": 619.7440089592776, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.907938003540039}, "pipe": "data"}
+{"event": "data", "data": {"rate": 619.0867586660061, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34938.375, 81920.0], "load": 0.93, "temperature": 74, "power": 305.63}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.029355049133301}, "pipe": "data"}
+{"event": "data", "data": {"rate": 620.5607649355844, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.980498313903809}, "pipe": "data"}
+{"event": "line", "data": "Train: 9 [ 15/16 (100%)] Loss: 6.980 (6.92) Time: 0.368s, 696.03/s (0.448s, 571.99/s) LR: 9.978e-02 Data: 0.000 (0.081)\n", "pipe": "stderr"}
+{"event": "data", "data": {"rate": 624.7825707842533, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"}
+{"event": "line", "data": "Test: [ 0/16] Time: 0.963 (0.963) Loss: 6.7888 (6.7888) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.3906 ( 0.3906)\n", "pipe": "stderr"}
+{"event": "line", "data": "Test: [ 16/16] Time: 0.018 (0.227) Loss: 6.4438 (6.8076) Acc@1: 0.0000 ( 0.3391) Acc@5: 3.1250 ( 1.2355)\n", "pipe": "stderr"}
+{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"}
+{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/majaguma.2024-04-02_16:55:21.895752/resnet152-multi.0/20240402-172015-resnet152-224/checkpoint-9.pth.tar', 0.3391472868217054)\n", "pipe": "stderr"}
+{"event": "line", "data": "\n", "pipe": "stderr"}
+{"event": "data", "data": {"rate": 571.2661482212146, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35422.375, 81920.0], "load": 0, "temperature": 73, "power": 247.331}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35422.375, 81920.0], "load": 0.55, "temperature": 70, "power": 97.133}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.832240104675293}, "pipe": "data"}
+{"event": "line", "data": "Train: 10 [ 0/16 ( 0%)] Loss: 6.832 (6.83) Time: 1.394s, 183.65/s (1.394s, 183.65/s) LR: 9.973e-02 Data: 1.027 (1.027)\n", "pipe": "stderr"}
+{"event": "data", "data": {"rate": 665.8569343484285, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.885162830352783}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35906.375, 81920.0], "load": 0.92, "temperature": 73, "power": 300.585}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 696.3805943225001, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.871856689453125}, "pipe": "data"}
+{"event": "data", "data": {"rate": 593.0976791067272, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.896815299987793}, "pipe": "data"}
+{"event": "data", "data": {"rate": 615.5346879696415, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"}
+{"event": "stop", "data": null, "pipe": "data"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-resnet152-multi.0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "resnet152", "--batch-size", "256", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/majaguma.2024-04-02_16:55:21.895752/resnet152-multi.0", "--checkpoint-hist", "1"], "time": 1712078541.038392, "return_code": -15}, "pipe": null}
diff --git a/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/resnet152.D0.data b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/resnet152.D0.data
new file mode 100644
index 000000000..a96f18118
--- /dev/null
+++ b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/resnet152.D0.data
@@ -0,0 +1,305 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "timm", "install_group": "torch", "install_variant": "cuda", "run_name": "majaguma.2024-04-02_16:55:21.895752", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "b7a983fcf74c005cfc04d84913c69872", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm", "plan": {"method": "per_gpu"}, "argv": {"--amp": true, "--model": "resnet152", "--batch-size": 256}, "tags": ["classification", "convnet", "resnet", "vision"], "weight": 1.0, "name": "resnet152", "tag": ["resnet152", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "trustingspider", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-b19ad74c-adf3-683a-1bdd-0ce516ef4aae": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 73, "power": 105.551, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712078276.538776, "milabench": {"tag": "paice-v1-10-g1243bba", "commit": "1243bbae76bfc8105d553472dcfce834b54a723e", "date": "2024-04-02 12:02:23 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-resnet152.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "resnet152", "--batch-size", "256", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/majaguma.2024-04-02_16:55:21.895752/resnet152.D0", "--checkpoint-hist", "1"], "time": 1712078276.5554135}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "line", "data": "Training with a single process on 1 device (cuda:0).\n", "pipe": "stderr"}
+{"event": "line", "data": "Model resnet152 created, param count:60192808\n", "pipe": "stderr"}
+{"event": "line", "data": "Data processing configuration for current model + dataset:\n", "pipe": "stderr"}
+{"event": "line", "data": "\tinput_size: (3, 224, 224)\n", "pipe": "stderr"}
+{"event": "line", "data": "\tinterpolation: bicubic\n", "pipe": "stderr"}
+{"event": "line", "data": "\tmean: (0.485, 0.456, 0.406)\n", "pipe": "stderr"}
+{"event": "line", "data": "\tstd: (0.229, 0.224, 0.225)\n", "pipe": "stderr"}
+{"event": "line", "data": "\tcrop_pct: 0.95\n", "pipe": "stderr"}
+{"event": "line", "data": "\tcrop_mode: center\n", "pipe": "stderr"}
+{"event": "line", "data": "Learning rate (0.1) calculated from base learning rate (0.1) and global batch size (256) with linear scaling.\n", "pipe": "stderr"}
+{"event": "line", "data": "Using native Torch AMP. Training in mixed precision.\n", "pipe": "stderr"}
+{"event": "line", "data": "Scheduled epochs: 300. LR stepped per epoch.\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "train", "loss": 6.928679466247559}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24052.375, 81920.0], "load": 1.0, "temperature": 72, "power": 312.946}}}, "pipe": "data"}
+{"event": "line", "data": "Train: 0 [ 0/16 ( 0%)] Loss: 6.929 (6.93) Time: 3.501s, 73.12/s (3.501s, 73.12/s) LR: 1.000e-05 Data: 1.316 (1.316)\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "train", "loss": 6.93458366394043}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.958017349243164}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27182.375, 81920.0], "load": 1.0, "temperature": 73, "power": 287.845}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.951747417449951}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.938842296600342}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27182.375, 81920.0], "load": 0.94, "temperature": 73, "power": 318.408}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.954232215881348}, "pipe": "data"}
+{"event": "line", "data": "Train: 0 [ 15/16 (100%)] Loss: 6.934 (6.94) Time: 0.362s, 707.91/s (0.587s, 436.42/s) LR: 1.000e-05 Data: 0.000 (0.098)\n", "pipe": "stderr"}
+{"event": "line", "data": "Test: [ 0/16] Time: 1.098 (1.098) Loss: 6.9339 (6.9339) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"}
+{"event": "line", "data": "Test: [ 16/16] Time: 0.240 (0.249) Loss: 6.8979 (6.9128) Acc@1: 0.0000 ( 0.1453) Acc@5: 28.1250 ( 0.5329)\n", "pipe": "stderr"}
+{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"}
+{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/majaguma.2024-04-02_16:55:21.895752/resnet152.D0/20240402-171801-resnet152-224/checkpoint-0.pth.tar', 0.14534883720930233)\n", "pipe": "stderr"}
+{"event": "line", "data": "\n", "pipe": "stderr"}
+{"event": "data", "data": {"rate": 707.9541880300716, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27666.375, 81920.0], "load": 0, "temperature": 70, "power": 97.036}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4978.375, 81920.0], "load": 1.0, "temperature": 69, "power": 98.775}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.963781833648682}, "pipe": "data"}
+{"event": "line", "data": "Train: 1 [ 0/16 ( 0%)] Loss: 6.964 (6.96) Time: 1.363s, 187.88/s (1.363s, 187.88/s) LR: 2.001e-02 Data: 0.997 (0.997)\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "train", "loss": 6.906595230102539}, "pipe": "data"}
+{"event": "data", "data": {"rate": 623.9048738088012, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27244.375, 81920.0], "load": 0.91, "temperature": 73, "power": 301.233}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.927788257598877}, "pipe": "data"}
+{"event": "data", "data": {"rate": 623.9586897230192, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.902334213256836}, "pipe": "data"}
+{"event": "data", "data": {"rate": 621.1864389862641, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.895504951477051}, "pipe": "data"}
+{"event": "data", "data": {"rate": 622.9994235211116, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27244.375, 81920.0], "load": 0.92, "temperature": 74, "power": 279.396}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.976802825927734}, "pipe": "data"}
+{"event": "line", "data": "Train: 1 [ 15/16 (100%)] Loss: 6.977 (6.92) Time: 0.366s, 698.58/s (0.439s, 582.81/s) LR: 2.001e-02 Data: 0.000 (0.074)\n", "pipe": "stderr"}
+{"event": "data", "data": {"rate": 630.6763576998, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"}
+{"event": "line", "data": "Test: [ 0/16] Time: 1.006 (1.006) Loss: 6.8470 (6.8470) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"}
+{"event": "line", "data": "Test: [ 16/16] Time: 0.018 (0.232) Loss: 6.5615 (6.8342) Acc@1: 18.7500 ( 0.2422) Acc@5: 28.1250 ( 1.2355)\n", "pipe": "stderr"}
+{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"}
+{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/majaguma.2024-04-02_16:55:21.895752/resnet152.D0/20240402-171801-resnet152-224/checkpoint-1.pth.tar', 0.24224806201550386)\n", "pipe": "stderr"}
+{"event": "line", "data": "\n", "pipe": "stderr"}
+{"event": "data", "data": {"rate": 698.4095060299613, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27730.375, 81920.0], "load": 0.84, "temperature": 73, "power": 278.193}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27730.375, 81920.0], "load": 0.02, "temperature": 69, "power": 95.645}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.833113193511963}, "pipe": "data"}
+{"event": "line", "data": "Train: 2 [ 0/16 ( 0%)] Loss: 6.833 (6.83) Time: 1.391s, 184.08/s (1.391s, 184.08/s) LR: 4.001e-02 Data: 1.025 (1.025)\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "train", "loss": 6.856740474700928}, "pipe": "data"}
+{"event": "data", "data": {"rate": 621.7290187676313, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28214.375, 81920.0], "load": 0.93, "temperature": 74, "power": 303.545}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.901230335235596}, "pipe": "data"}
+{"event": "data", "data": {"rate": 622.2212650529127, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.94493293762207}, "pipe": "data"}
+{"event": "data", "data": {"rate": 619.3351932974532, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9797515869140625}, "pipe": "data"}
+{"event": "data", "data": {"rate": 620.65949349532, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28214.375, 81920.0], "load": 0.93, "temperature": 74, "power": 301.451}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.940389156341553}, "pipe": "data"}
+{"event": "line", "data": "Train: 2 [ 15/16 (100%)] Loss: 6.940 (6.92) Time: 0.367s, 696.73/s (0.442s, 579.46/s) LR: 4.001e-02 Data: 0.000 (0.075)\n", "pipe": "stderr"}
+{"event": "data", "data": {"rate": 626.2283912249218, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"}
+{"event": "line", "data": "Test: [ 0/16] Time: 0.960 (0.960) Loss: 6.8053 (6.8053) Acc@1: 0.7812 ( 0.7812) Acc@5: 3.1250 ( 3.1250)\n", "pipe": "stderr"}
+{"event": "line", "data": "Test: [ 16/16] Time: 0.019 (0.228) Loss: 6.3810 (6.8034) Acc@1: 0.0000 ( 0.1453) Acc@5: 3.1250 ( 1.1386)\n", "pipe": "stderr"}
+{"event": "data", "data": {"rate": 696.7547428530818, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28698.375, 81920.0], "load": 0.91, "temperature": 71, "power": 112.35}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28698.375, 81920.0], "load": 0, "temperature": 69, "power": 95.158}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.861546039581299}, "pipe": "data"}
+{"event": "line", "data": "Train: 3 [ 0/16 ( 0%)] Loss: 6.862 (6.86) Time: 1.363s, 187.80/s (1.363s, 187.80/s) LR: 6.000e-02 Data: 0.997 (0.997)\n", "pipe": "stderr"}
+{"event": "data", "data": {"rate": 538.5215973739532, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.910906791687012}, "pipe": "data"}
+{"event": "data", "data": {"rate": 697.3763586312554, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.954673767089844}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [29182.375, 81920.0], "load": 0.96, "temperature": 73, "power": 254.949}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 683.0387515712258, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.991860389709473}, "pipe": "data"}
+{"event": "data", "data": {"rate": 615.5464197513131, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.054144382476807}, "pipe": "data"}
+{"event": "data", "data": {"rate": 619.0368784046575, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [29182.375, 81920.0], "load": 0.92, "temperature": 74, "power": 302.369}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.03437614440918}, "pipe": "data"}
+{"event": "line", "data": "Train: 3 [ 15/16 (100%)] Loss: 7.034 (6.96) Time: 0.368s, 696.13/s (0.441s, 581.11/s) LR: 6.000e-02 Data: 0.000 (0.074)\n", "pipe": "stderr"}
+{"event": "data", "data": {"rate": 626.3523568231118, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"}
+{"event": "line", "data": "Test: [ 0/16] Time: 1.004 (1.004) Loss: 6.9324 (6.9324) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.3906 ( 0.3906)\n", "pipe": "stderr"}
+{"event": "line", "data": "Test: [ 16/16] Time: 0.018 (0.235) Loss: 6.3954 (6.8244) Acc@1: 0.0000 ( 0.2665) Acc@5: 0.0000 ( 1.1143)\n", "pipe": "stderr"}
+{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"}
+{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/majaguma.2024-04-02_16:55:21.895752/resnet152.D0/20240402-171801-resnet152-224/checkpoint-3.pth.tar', 0.26647286821705424)\n", "pipe": "stderr"}
+{"event": "line", "data": "\n", "pipe": "stderr"}
+{"event": "data", "data": {"rate": 696.1512481479284, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [29666.375, 81920.0], "load": 0, "temperature": 73, "power": 336.232}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [29666.375, 81920.0], "load": 0, "temperature": 69, "power": 94.664}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.864370346069336}, "pipe": "data"}
+{"event": "line", "data": "Train: 4 [ 0/16 ( 0%)] Loss: 6.864 (6.86) Time: 1.412s, 181.26/s (1.412s, 181.26/s) LR: 8.000e-02 Data: 1.045 (1.045)\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "train", "loss": 6.925772666931152}, "pipe": "data"}
+{"event": "data", "data": {"rate": 685.7684773436212, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"}
+{"event": "data", "data": {"rate": 696.4598174842167, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.979971885681152}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [30150.375, 81920.0], "load": 0.99, "temperature": 72, "power": 318.327}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 593.343613150496, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.047486305236816}, "pipe": "data"}
+{"event": "data", "data": {"rate": 619.3405544189388, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0692853927612305}, "pipe": "data"}
+{"event": "data", "data": {"rate": 618.3821764078899, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.03903341293335}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [30150.375, 81920.0], "load": 0.99, "temperature": 74, "power": 260.613}}}, "pipe": "data"}
+{"event": "line", "data": "Train: 4 [ 15/16 (100%)] Loss: 7.039 (7.00) Time: 0.367s, 697.38/s (0.444s, 576.54/s) LR: 8.000e-02 Data: 0.001 (0.077)\n", "pipe": "stderr"}
+{"event": "line", "data": "Test: [ 0/16] Time: 0.978 (0.978) Loss: 6.8013 (6.8013) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"}
+{"event": "line", "data": "Test: [ 16/16] Time: 0.019 (0.228) Loss: 6.5208 (6.8458) Acc@1: 0.0000 ( 0.1938) Acc@5: 0.0000 ( 1.2597)\n", "pipe": "stderr"}
+{"event": "data", "data": {"rate": 695.8855858517969, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [30634.375, 81920.0], "load": 0.56, "temperature": 73, "power": 314.793}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.870933532714844}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [30970.375, 81920.0], "load": 0.01, "temperature": 69, "power": 253.158}}}, "pipe": "data"}
+{"event": "line", "data": "Train: 5 [ 0/16 ( 0%)] Loss: 6.871 (6.87) Time: 1.413s, 181.18/s (1.413s, 181.18/s) LR: 9.993e-02 Data: 1.046 (1.046)\n", "pipe": "stderr"}
+{"event": "data", "data": {"rate": 696.742391472384, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.974578857421875}, "pipe": "data"}
+{"event": "data", "data": {"rate": 595.0913965193677, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0785064697265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31118.375, 81920.0], "load": 1.0, "temperature": 73, "power": 179.321}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 617.5611905017515, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.1159820556640625}, "pipe": "data"}
+{"event": "data", "data": {"rate": 617.7851915182639, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.087150573730469}, "pipe": "data"}
+{"event": "data", "data": {"rate": 619.0399266090767, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.095391273498535}, "pipe": "data"}
+{"event": "line", "data": "Train: 5 [ 15/16 (100%)] Loss: 7.095 (7.04) Time: 0.368s, 696.15/s (0.444s, 576.61/s) LR: 9.993e-02 Data: 0.001 (0.077)\n", "pipe": "stderr"}
+{"event": "line", "data": "Test: [ 0/16] Time: 1.010 (1.010) Loss: 6.9660 (6.9660) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"}
+{"event": "line", "data": "Test: [ 16/16] Time: 0.019 (0.236) Loss: 6.3292 (6.8915) Acc@1: 0.0000 ( 0.2180) Acc@5: 0.0000 ( 0.9690)\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31118.375, 81920.0], "load": 1.0, "temperature": 72, "power": 297.346}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 695.0092799417304, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31602.375, 81920.0], "load": 0.94, "temperature": 73, "power": 305.34}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.892255783081055}, "pipe": "data"}
+{"event": "line", "data": "Train: 6 [ 0/16 ( 0%)] Loss: 6.892 (6.89) Time: 1.391s, 184.03/s (1.391s, 184.03/s) LR: 9.990e-02 Data: 1.025 (1.025)\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31938.375, 81920.0], "load": 0.95, "temperature": 72, "power": 305.382}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 602.8188939411352, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.007454872131348}, "pipe": "data"}
+{"event": "data", "data": {"rate": 657.6032740875111, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.977649688720703}, "pipe": "data"}
+{"event": "data", "data": {"rate": 655.8522315460139, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32086.375, 81920.0], "load": 1.0, "temperature": 74, "power": 305.829}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.015753746032715}, "pipe": "data"}
+{"event": "data", "data": {"rate": 696.5809807330335, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.076228141784668}, "pipe": "data"}
+{"event": "data", "data": {"rate": 595.7863043876953, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.142056465148926}, "pipe": "data"}
+{"event": "line", "data": "Train: 6 [ 15/16 (100%)] Loss: 7.142 (7.03) Time: 0.368s, 694.85/s (0.443s, 577.67/s) LR: 9.990e-02 Data: 0.000 (0.076)\n", "pipe": "stderr"}
+{"event": "line", "data": "Test: [ 0/16] Time: 0.994 (0.994) Loss: 6.8032 (6.8032) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"}
+{"event": "line", "data": "Test: [ 16/16] Time: 0.018 (0.230) Loss: 6.4867 (6.8581) Acc@1: 0.0000 ( 0.2665) Acc@5: 0.0000 ( 1.2112)\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32086.375, 81920.0], "load": 1.0, "temperature": 71, "power": 98.12}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 696.2036233078143, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32570.375, 81920.0], "load": 0.77, "temperature": 70, "power": 96.341}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.864059925079346}, "pipe": "data"}
+{"event": "line", "data": "Train: 7 [ 0/16 ( 0%)] Loss: 6.864 (6.86) Time: 1.400s, 182.84/s (1.400s, 182.84/s) LR: 9.987e-02 Data: 1.034 (1.034)\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33054.375, 81920.0], "load": 0.99, "temperature": 73, "power": 252.497}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 536.2713023667408, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.928898811340332}, "pipe": "data"}
+{"event": "data", "data": {"rate": 684.5163674031223, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.994691848754883}, "pipe": "data"}
+{"event": "data", "data": {"rate": 633.0877114856806, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.032052516937256}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33054.375, 81920.0], "load": 0.99, "temperature": 72, "power": 305.566}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 695.4763387217931, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0799455642700195}, "pipe": "data"}
+{"event": "data", "data": {"rate": 592.7090214989917, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.042294025421143}, "pipe": "data"}
+{"event": "line", "data": "Train: 7 [ 15/16 (100%)] Loss: 7.042 (6.99) Time: 0.368s, 696.07/s (0.443s, 577.81/s) LR: 9.987e-02 Data: 0.000 (0.076)\n", "pipe": "stderr"}
+{"event": "data", "data": {"rate": 624.5549192316796, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"}
+{"event": "line", "data": "Test: [ 0/16] Time: 0.966 (0.966) Loss: 6.8256 (6.8256) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"}
+{"event": "line", "data": "Test: [ 16/16] Time: 0.018 (0.234) Loss: 6.5559 (6.8334) Acc@1: 0.0000 ( 0.2422) Acc@5: 0.0000 ( 0.9448)\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33054.375, 81920.0], "load": 0, "temperature": 70, "power": 96.828}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 582.3516158983447, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33538.375, 81920.0], "load": 0, "temperature": 73, "power": 96.145}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.889812469482422}, "pipe": "data"}
+{"event": "line", "data": "Train: 8 [ 0/16 ( 0%)] Loss: 6.890 (6.89) Time: 1.427s, 179.42/s (1.427s, 179.42/s) LR: 9.982e-02 Data: 1.059 (1.059)\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34022.375, 81920.0], "load": 0.92, "temperature": 73, "power": 302.043}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.915931701660156}, "pipe": "data"}
+{"event": "data", "data": {"rate": 614.8461687789592, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.943663597106934}, "pipe": "data"}
+{"event": "data", "data": {"rate": 617.023210998383, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.988678932189941}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34022.375, 81920.0], "load": 0.93, "temperature": 73, "power": 296.011}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 616.8598450954569, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.94257116317749}, "pipe": "data"}
+{"event": "data", "data": {"rate": 619.0919263451244, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.018918037414551}, "pipe": "data"}
+{"event": "line", "data": "Train: 8 [ 15/16 (100%)] Loss: 7.019 (6.94) Time: 0.368s, 695.98/s (0.445s, 574.76/s) LR: 9.982e-02 Data: 0.000 (0.078)\n", "pipe": "stderr"}
+{"event": "data", "data": {"rate": 625.3929211193839, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"}
+{"event": "line", "data": "Test: [ 0/16] Time: 0.979 (0.979) Loss: 6.8033 (6.8033) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"}
+{"event": "line", "data": "Test: [ 16/16] Time: 0.020 (0.228) Loss: 6.6473 (6.8228) Acc@1: 0.0000 ( 0.2907) Acc@5: 0.0000 ( 1.0174)\n", "pipe": "stderr"}
+{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"}
+{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/majaguma.2024-04-02_16:55:21.895752/resnet152.D0/20240402-171801-resnet152-224/checkpoint-8.pth.tar', 0.29069767441860467)\n", "pipe": "stderr"}
+{"event": "line", "data": "\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34022.375, 81920.0], "load": 0, "temperature": 70, "power": 96.536}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 677.5896069972074, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34506.375, 81920.0], "load": 0.95, "temperature": 73, "power": 201.862}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.901410102844238}, "pipe": "data"}
+{"event": "line", "data": "Train: 9 [ 0/16 ( 0%)] Loss: 6.901 (6.90) Time: 1.391s, 183.99/s (1.391s, 183.99/s) LR: 9.978e-02 Data: 1.025 (1.025)\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34990.375, 81920.0], "load": 0.99, "temperature": 72, "power": 148.201}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.856259822845459}, "pipe": "data"}
+{"event": "data", "data": {"rate": 617.6672239126855, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.910282135009766}, "pipe": "data"}
+{"event": "data", "data": {"rate": 618.1281290417545, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.907938003540039}, "pipe": "data"}
+{"event": "data", "data": {"rate": 616.6640415754832, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34990.375, 81920.0], "load": 0.99, "temperature": 73, "power": 238.836}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.029355049133301}, "pipe": "data"}
+{"event": "data", "data": {"rate": 620.4166658224798, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.980498313903809}, "pipe": "data"}
+{"event": "line", "data": "Train: 9 [ 15/16 (100%)] Loss: 6.980 (6.92) Time: 0.369s, 694.44/s (0.443s, 578.30/s) LR: 9.978e-02 Data: 0.000 (0.075)\n", "pipe": "stderr"}
+{"event": "data", "data": {"rate": 624.281044459124, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"}
+{"event": "line", "data": "Test: [ 0/16] Time: 0.979 (0.979) Loss: 6.7888 (6.7888) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.3906 ( 0.3906)\n", "pipe": "stderr"}
+{"event": "line", "data": "Test: [ 16/16] Time: 0.020 (0.231) Loss: 6.4438 (6.8076) Acc@1: 0.0000 ( 0.3391) Acc@5: 3.1250 ( 1.2355)\n", "pipe": "stderr"}
+{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"}
+{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/majaguma.2024-04-02_16:55:21.895752/resnet152.D0/20240402-171801-resnet152-224/checkpoint-9.pth.tar', 0.3391472868217054)\n", "pipe": "stderr"}
+{"event": "line", "data": "\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34990.375, 81920.0], "load": 0, "temperature": 70, "power": 95.84}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 654.9093100646307, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35474.375, 81920.0], "load": 0.89, "temperature": 74, "power": 305.818}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.832240104675293}, "pipe": "data"}
+{"event": "line", "data": "Train: 10 [ 0/16 ( 0%)] Loss: 6.832 (6.83) Time: 1.443s, 177.44/s (1.443s, 177.44/s) LR: 9.973e-02 Data: 1.075 (1.075)\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35958.375, 81920.0], "load": 1.0, "temperature": 73, "power": 305.448}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 696.1347333752168, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.885162830352783}, "pipe": "data"}
+{"event": "data", "data": {"rate": 590.2609266641402, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.871856689453125}, "pipe": "data"}
+{"event": "data", "data": {"rate": 616.5251300640239, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.896815299987793}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35958.375, 81920.0], "load": 0.99, "temperature": 73, "power": 191.682}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 616.4410149950778, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"}
+{"event": "stop", "data": null, "pipe": "data"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-resnet152.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "resnet152", "--batch-size", "256", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/majaguma.2024-04-02_16:55:21.895752/resnet152.D0", "--checkpoint-hist", "1"], "time": 1712078408.1757667, "return_code": -15}, "pipe": null}
diff --git a/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/resnet50.D0.data b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/resnet50.D0.data
new file mode 100644
index 000000000..bb6c1c2fb
--- /dev/null
+++ b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/resnet50.D0.data
@@ -0,0 +1,2207 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "torchvision", "install_group": "torch", "install_variant": "cuda", "run_name": "majaguma.2024-04-02_16:55:21.895752", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "b7a983fcf74c005cfc04d84913c69872", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision", "plan": {"method": "per_gpu"}, "argv": {"--precision": "tf32-fp16", "--lr": 0.01, "--no-stdout": true, "--epochs": 50, "--model": "resnet50", "--batch-size": 64}, "tags": ["classification", "convnet", "resnet", "vision"], "weight": 1.0, "name": "resnet50", "tag": ["resnet50", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "trustingspider", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-b19ad74c-adf3-683a-1bdd-0ce516ef4aae": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 72, "power": 103.998, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712077415.156174, "milabench": {"tag": "paice-v1-10-g1243bba", "commit": "1243bbae76bfc8105d553472dcfce834b54a723e", "date": "2024-04-02 12:02:23 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-resnet50.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32-fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "resnet50", "--batch-size", "64"], "time": 1712077415.17404}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "progress": [0, 50]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0189208984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.141357421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.1485595703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.07318115234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0589599609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.13543701171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.06268310546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.1090087890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.1446533203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.08746337890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.1492919921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0975341796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.11920166015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0943603515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.06915283203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.185791015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.11456298828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.11767578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.02716064453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.13958740234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.04656982421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.994384765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.99810791015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9683837890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.03021240234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0460205078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.11968994140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.06439208984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4746.375, 81920.0], "load": 0.8, "temperature": 71, "power": 248.614}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.01129150390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.03814697265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.12310791015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9730224609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.06427001953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0361328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.89617919921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.12481689453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9935302734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.02587890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92803955078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9180908203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92852783203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.14599609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.06494140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0045166015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0550537109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.053955078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.08026123046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0406494140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.06060791015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.01177978515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9913330078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.995849609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0933837890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.084716796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.04132080078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97540283203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.11065673828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.03277587890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.925048828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.958740234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0076904296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.011474609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0970458984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "progress": [1, 50]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.915283203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.90789794921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.86712646484375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.74407958984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.85906982421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.98822021484375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.88079833984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.90875244140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.962646484375}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4746.375, 81920.0], "load": 0.79, "temperature": 71, "power": 189.197}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.89697265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9716796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9688720703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.84259033203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.95147705078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.812744140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9739990234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.99066162109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.760498046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9683837890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0108642578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.94647216796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.956298828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.00640869140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.90313720703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.94976806640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.985595703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.06951904296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92645263671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.99237060546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93511962890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.947265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9364013671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 1074.3753222434734, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92181396484375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93798828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.00555419921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96148681640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.99151611328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0391845703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9122314453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.94342041015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.933837890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.001708984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.89166259765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9281005859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9161376953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.05157470703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.95166015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 1064.3505246204525, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.95526123046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0093994140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.929931640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.99737548828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.005615234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9273681640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9794921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.01171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.965576171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.14642333984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4746.375, 81920.0], "load": 0.76, "temperature": 71, "power": 169.166}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92083740234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9735107421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.01495361328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0101318359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.966552734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "progress": [2, 50]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 1100.7928174952224, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.72662353515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.740234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.79376220703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.80889892578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8753662109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.86322021484375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.7818603515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.94903564453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.01275634765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.81646728515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.912109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 749.784248703782, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8917236328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.992919921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.88751220703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.83465576171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.99383544921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.907958984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92449951171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.938232421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8594970703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.906982421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8302001953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.89501953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97406005859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9434814453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.95294189453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.02996826171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9560546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 1075.1792667489865, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.98907470703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92755126953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8988037109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9034423828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93115234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.83819580078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.86309814453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97064208984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.954345703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9739990234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4746.375, 81920.0], "load": 0.8, "temperature": 72, "power": 302.831}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9886474609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9158935546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.06494140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.94757080078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.961669921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9371337890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.976806640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 1068.9057013433771, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96405029296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.90570068359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.00738525390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96612548828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.94903564453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0523681640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9747314453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.95806884765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.049560546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.98590087890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9737548828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.819580078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.03076171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.95794677734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0809326171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97479248046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.883544921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.98468017578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 1092.4736621669952, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9923095703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "progress": [3, 50]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.825927734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.85980224609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.786376953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.82373046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.850341796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.79498291015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.75146484375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.84417724609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 742.0001887308397, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.7880859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.83636474609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.87469482421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.95977783203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9111328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96307373046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.87982177734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.78558349609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.83172607421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8668212890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4746.375, 81920.0], "load": 0.8, "temperature": 71, "power": 208.557}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.99072265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.84478759765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0048828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96240234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8812255859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.94757080078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.94085693359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 1089.0248017421088, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.03179931640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.91180419921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.88433837890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0032958984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.870849609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9293212890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97796630859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.91650390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93719482421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.00421142578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.91351318359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.98065185546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9639892578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9403076171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.928466796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0362548828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 1080.9651501465464, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.99237060546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9554443359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.949951171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0242919921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9603271484375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92755126953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.036865234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8326416015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.987060546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9608154296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9676513671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92315673828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.95703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.85491943359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8707275390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.84814453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.00048828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 1085.314641797184, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.00640869140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.98382568359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9918212890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.05633544921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92144775390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "progress": [4, 50]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.75518798828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4746.375, 81920.0], "load": 0.6, "temperature": 69, "power": 95.053}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.7857666015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8743896484375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.75701904296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.76666259765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 759.4980301884566, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.879638671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.83795166015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8651123046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.88006591796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.90704345703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.87725830078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.86773681640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8424072265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.98822021484375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.84716796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93060302734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.84649658203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.78570556640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8453369140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.89691162109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.78515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.91192626953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 1077.4718864361746, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.88116455078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.052490234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.7698974609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0291748046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.00244140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.890380859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9866943359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.94219970703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.936279296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.94866943359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96630859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9876708984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.83306884765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.91876220703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0609130859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0474853515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9068603515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 1078.7447551130078, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.825439453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.935546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.01806640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.87408447265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.077392578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.91717529296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.005859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93695068359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9571533203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4746.375, 81920.0], "load": 0.78, "temperature": 72, "power": 240.996}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.99969482421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.05889892578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.90765380859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9873046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.03533935546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.975341796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.989990234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.86065673828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 1070.141934764114, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.022705078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.99322509765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.998779296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.91595458984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.04693603515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.00433349609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.016357421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.10699462890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "progress": [5, 50]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.776611328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93902587890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 777.8909364648911, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.7945556640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.83807373046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.908935546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.78363037109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9781494140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.858154296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8026123046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.88397216796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.968505859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.900634765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9215087890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9244384765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.74603271484375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9190673828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.87750244140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.965087890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8619384765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 1076.3290604725014, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.01373291015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8697509765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8800048828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.07879638671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.872802734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8953857421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9554443359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9736328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.99176025390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4746.375, 81920.0], "load": 0.79, "temperature": 72, "power": 285.977}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9207763671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9617919921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.88916015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9837646484375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93804931640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8397216796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.818359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.08831787109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 1071.3704503553447, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.88812255859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.88226318359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.978759765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.01202392578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.994140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.86712646484375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9315185546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9251708984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.035888671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9388427734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9542236328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.91827392578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.00018310546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.949951171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.90899658203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9385986328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.98828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 1072.7630317340265, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97064208984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.85308837890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0299072265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.99151611328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9407958984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.95916748046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8702392578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.14739990234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.94970703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.02166748046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.95654296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "progress": [6, 50]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.83154296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 1108.5237842026875, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.766845703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.7198486328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8890380859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8394775390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.88006591796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8902587890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.94049072265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4746.375, 81920.0], "load": 0.8, "temperature": 72, "power": 263.396}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.89520263671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8848876953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.85162353515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9150390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.91583251953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.88653564453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8624267578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93023681640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 848.6058564655643, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9425048828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8919677734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93402099609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9014892578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.98834228515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.906982421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9183349609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8953857421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.992431640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.87017822265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.07659912109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.91363525390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.95977783203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.95745849609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.99517822265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8734130859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96173095703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 1079.9318185840405, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.91064453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8917236328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9034423828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.79461669921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9713134765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.88519287109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.05010986328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.087646484375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8721923828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97113037109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0235595703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.935302734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.88616943359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93988037109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92608642578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.86358642578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 1086.0219293641771, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.86474609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.975341796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.07244873046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8929443359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8892822265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.883056640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.1773681640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0374755859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.01177978515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97216796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4746.375, 81920.0], "load": 0.82, "temperature": 72, "power": 305.679}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.87158203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.998046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.01385498046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.95745849609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "progress": [7, 50]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 1112.3136179629682, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.88031005859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.7603759765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.7989501953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.7025146484375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.896728515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.91278076171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9241943359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8050537109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.7813720703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.98284912109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.76763916015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8951416015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 772.6754224547993, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8736572265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.825927734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.848388671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.7177734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.90838623046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.837890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.85198974609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9759521484375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.04522705078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.894287109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8397216796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9031982421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96685791015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92254638671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97930908203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.888427734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.935546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 1081.852181328063, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0098876953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0673828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.88134765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.87628173828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8970947265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.00665283203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.7503662109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.04925537109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8714599609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4746.375, 81920.0], "load": 0.73, "temperature": 72, "power": 186.388}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9698486328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96661376953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.95611572265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.00921630859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.99481201171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.95672607421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9947509765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.01983642578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 1066.7693327564218, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97576904296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.95281982421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9803466796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.99456787109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.953857421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.923583984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9620361328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.881103515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.10626220703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.03509521484375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.046142578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.02752685546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.02606201171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.98681640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.869873046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8306884765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.01446533203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 1102.1396729131875, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "progress": [8, 50]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.7694091796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.90570068359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9017333984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8131103515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.78466796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.839599609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8763427734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.7899169921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.78131103515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 730.0934192302099, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.95062255859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.838623046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0047607421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8917236328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9434814453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9041748046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.7479248046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.79296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9053955078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4746.375, 81920.0], "load": 0.79, "temperature": 73, "power": 194.123}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.84368896484375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92041015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97747802734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.978515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.87054443359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93328857421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8746337890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8553466796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 1068.9025120579545, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.95281982421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.87518310546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.95941162109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.90655517578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9464111328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9544677734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.91143798828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93121337890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0428466796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.01019287109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97503662109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96258544921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0184326171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.85687255859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.89337158203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.944580078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.87615966796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 1069.6564897382048, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.99981689453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.91278076171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92169189453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9564208984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0125732421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93475341796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.79156494140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97900390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.011474609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.89605712890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96893310546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.95782470703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9952392578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92376708984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.86676025390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.09295654296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8553466796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 1086.6782710455707, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.018798828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.04425048828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.976806640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9403076171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "progress": [9, 50]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92926025390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4746.375, 81920.0], "load": 0.37, "temperature": 70, "power": 96.312}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.6580810546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.79443359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8775634765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.86627197265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 728.4239562689767, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.77252197265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.763671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.80657958984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.939697265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.94830322265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96368408203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.7374267578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.80682373046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.979248046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.90826416015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92901611328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.86505126953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.94549560546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9329833984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.87249755859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9117431640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.937255859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 1082.5029515044914, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93511962890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.03314208984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.873779296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9061279296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.89794921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0120849609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.88226318359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.00732421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.884765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.932373046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.956298828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.996337890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8609619140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.944091796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.04974365234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92803955078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 1069.349988821429, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8800048828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97467041015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.88360595703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.931640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.91302490234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97784423828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.85040283203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.94573974609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.87774658203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4746.375, 81920.0], "load": 0.78, "temperature": 73, "power": 293.125}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.98095703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97418212890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96795654296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.04998779296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.832275390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0029296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.08526611328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.05029296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 1075.708238023532, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.98223876953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.904541015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.01593017578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97027587890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.95758056640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.952880859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.82525634765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9888916015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "progress": [10, 50]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.7283935546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 1105.5787146080977, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8115234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.839111328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8455810546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.73211669921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.75860595703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.91790771484375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.87811279296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.87445068359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9267578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.86419677734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8148193359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.80560302734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.84576416015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.82525634765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.90692138671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.88763427734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.98175048828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 854.6778213161937, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9814453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92071533203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.00933837890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.79974365234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.85479736328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.98846435546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96307373046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.02642822265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4746.375, 81920.0], "load": 0.76, "temperature": 72, "power": 297.708}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9974365234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.958740234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.83905029296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.91107177734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9456787109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.00018310546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.91265869140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.84893798828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.86065673828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 1074.2054224177707, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92889404296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.91302490234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.99627685546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.85137939453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.99835205078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.01611328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.88165283203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.88055419921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.85882568359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.94091796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8804931640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0657958984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.10986328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.89898681640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0123291015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9727783203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 1069.3931295326697, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.925048828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9560546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.923583984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.86016845703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.85345458984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.98193359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92242431640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93914794921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.918212890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.87127685546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "progress": [11, 50]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.75531005859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 1100.7683618512228, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.7322998046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.83526611328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.912109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.84112548828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4746.375, 81920.0], "load": 0.03, "temperature": 72, "power": 302.852}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.91552734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.95172119140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.84033203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.825439453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.7950439453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9066162109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93914794921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9088134765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.931396484375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 800.9970210169854, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.83197021484375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.00299072265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.85174560546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9654541015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.85888671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.94476318359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9080810546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8575439453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.78045654296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8883056640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.88189697265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.91107177734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.902099609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.89544677734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.930908203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.87200927734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 1074.5376454382877, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0087890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.82464599609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.02545166015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.00091552734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.94940185546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8238525390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.99969482421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96783447265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.98394775390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.90972900390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9095458984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93463134765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.88482666015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9969482421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.05096435546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.86676025390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.95098876953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 1073.7104016430374, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9444580078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8795166015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.01123046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.043701171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.912353515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.99859619140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9552001953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4746.375, 81920.0], "load": 0.74, "temperature": 73, "power": 307.607}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.02886962890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97113037109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.83154296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9190673828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.90521240234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.91668701171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.02362060546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.99249267578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "progress": [12, 50]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 1098.5822783581073, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.82745361328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.82025146484375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.79949951171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.865966796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.83087158203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.925537109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.82574462890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.767822265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.86492919921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.95361328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.94927978515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 752.1941901190678, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8480224609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.833984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.87884521484375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.030029296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.86968994140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.88458251953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9100341796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.87530517578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.88140869140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.94757080078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.922119140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.949462890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.877685546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.94097900390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9739990234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.99249267578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9276123046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 1082.3164629453127, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.91339111328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8748779296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8927001953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.90081787109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.98779296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.958984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8587646484375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.84796142578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4746.375, 81920.0], "load": 0.74, "temperature": 73, "power": 295.625}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.88671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.06488037109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.99530029296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.909423828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.966796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.91583251953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.897705078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.95391845703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.896728515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 1070.5691999764088, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9476318359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9757080078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.87677001953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.945068359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0169677734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.94317626953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.00152587890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.94317626953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9803466796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97845458984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9322509765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96636962890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.89862060546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.971435546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9573974609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.99713134765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0126953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 1094.725606594049, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.89569091796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.06988525390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "progress": [13, 50]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.89239501953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8389892578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.819091796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.849609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93353271484375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.84130859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.94427490234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 734.9186436066431, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.85595703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.79571533203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8326416015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.82928466796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.87353515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97906494140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.87469482421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.89910888671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4746.375, 81920.0], "load": 0.74, "temperature": 73, "power": 212.224}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.7947998046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.7410888671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.01068115234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.86041259765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.080078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.851318359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.98089599609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.82415771484375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0321044921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 1060.5722638745788, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.83056640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8673095703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.962158203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8050537109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.894775390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.80596923828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.934326171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9671630859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.88690185546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9774169921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.021484375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.01519775390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.88018798828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.88262939453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93914794921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.898193359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 1063.7712180327794, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9637451171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.95635986328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.912109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.90606689453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.985107421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92730712890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9393310546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.87530517578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.99542236328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9061279296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8958740234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.95416259765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.89166259765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.86102294921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.89556884765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0406494140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0345458984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93426513671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 1092.0524242951435, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.036865234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.95068359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.993408203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.99908447265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.879638671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "progress": [14, 50]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4746.375, 81920.0], "load": 0.48, "temperature": 70, "power": 309.283}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.80560302734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8131103515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.79180908203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.884033203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 746.7580365228025, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.83612060546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.84588623046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.88916015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.921630859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.00128173828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97076416015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.94549560546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.77191162109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8509521484375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.7977294921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.04913330078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8720703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9267578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.922119140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8463134765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.11358642578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8544921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 1081.391555645523, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.943359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8697509765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8599853515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9713134765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.028076171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.89837646484375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9468994140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.01458740234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.02850341796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.91253662109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.83660888671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.79998779296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.88018798828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8387451171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.04052734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.02410888671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 1071.262253603257, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9302978515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.98956298828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9554443359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97637939453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93865966796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.87896728515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.86175537109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9219970703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4746.375, 81920.0], "load": 0.75, "temperature": 72, "power": 187.62}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.95196533203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97454833984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.097412109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.03302001953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93475341796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9525146484375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.87872314453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8885498046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 1091.9746676731168, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.79437255859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93511962890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.98779296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.91729736328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9586181640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.87786865234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.827880859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.02691650390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.05718994140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "progress": [15, 50]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.84033203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 1105.9526522522765, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.78076171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.77789306640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.74603271484375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.89959716796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.87744140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.72613525390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.79461669921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.89373779296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.91949462890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.822998046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.75787353515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.83685302734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.89617919921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.80035400390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9439697265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93365478515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 844.5575533909964, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.85809326171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8887939453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.01885986328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.85614013671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.02154541015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.95733642578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.848876953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.85498046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4746.375, 81920.0], "load": 0.8, "temperature": 72, "power": 302.931}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.84747314453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9488525390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.91619873046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8489990234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.00286865234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.89837646484375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.82684326171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.88427734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.01019287109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 1069.2062222243835, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92095947265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93646240234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8726806640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.87744140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0084228515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9189453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8590087890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97686767578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.94952392578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.01904296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.01519775390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.973876953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8306884765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.949462890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93267822265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.88104248046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0472412109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 1070.4621792282105, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4746.375, 81920.0], "load": 0.79, "temperature": 73, "power": 307.934}}}, "pipe": "data"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-resnet50.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32-fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "resnet50", "--batch-size", "64"], "time": 1712077485.6945655, "return_code": 0}, "pipe": null}
diff --git a/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/rwkv.D0.data b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/rwkv.D0.data
new file mode 100644
index 000000000..a49ee583b
--- /dev/null
+++ b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/rwkv.D0.data
@@ -0,0 +1,462 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/rwkv", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "rwkv", "install_group": "torch", "install_variant": "cuda", "run_name": "majaguma.2024-04-02_16:55:21.895752", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "b7a983fcf74c005cfc04d84913c69872", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/rwkv", "tags": ["llm", "rnn", "unsupported-rocm"], "plan": {"method": "per_gpu"}, "argv": {"--data_type": "dummy", "--ctx_len": 128, "--epoch_steps": 1000, "--epoch_count": 20, "--epoch_begin": 0, "--epoch_save": 0, "--micro_bsz": 16, "--n_layer": 12, "--n_embd": 768, "--pre_ffn": 0, "--head_qk": 0, "--lr_init": "6e-4", "--lr_final": "1e-5", "--warmup_steps": 0, "--beta1": 0.9, "--beta2": 0.99, "--adam_eps": "1e-8", "--accelerator": "gpu", "--devices": 1, "--precision": "tf32", "--strategy": "ddp_find_unused_parameters_false", "--grad_cp": 0, "--random_seed": 1234, "--enable_progress_bar": "False"}, "weight": 1.0, "name": "rwkv", "tag": ["rwkv", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "trustingspider", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-b19ad74c-adf3-683a-1bdd-0ce516ef4aae": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 45, "power": 72.501, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712079323.433281, "milabench": {"tag": "paice-v1-10-g1243bba", "commit": "1243bbae76bfc8105d553472dcfce834b54a723e", "date": "2024-04-02 12:02:23 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/rwkv/voirconf-rwkv.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/rwkv/rwkv-v4neo/train.py", "--data_type", "dummy", "--ctx_len", "128", "--epoch_steps", "1000", "--epoch_count", "20", "--epoch_begin", "0", "--epoch_save", "0", "--micro_bsz", "16", "--n_layer", "12", "--n_embd", "768", "--pre_ffn", "0", "--head_qk", "0", "--lr_init", "6e-4", "--lr_final", "1e-5", "--warmup_steps", "0", "--beta1", "0.9", "--beta2", "0.99", "--adam_eps", "1e-8", "--accelerator", "gpu", "--devices", "1", "--precision", "tf32", "--strategy", "ddp_find_unused_parameters_false", "--grad_cp", "0", "--random_seed", "1234", "--enable_progress_bar", "False"], "time": 1712079323.4496007}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "line", "data": "[2024-04-02 17:35:25,680] [INFO] [real_accelerator.py:158:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", "pipe": "stdout"}
+{"event": "line", "data": "########## work in progress ##########\n", "pipe": "stderr"}
+{"event": "line", "data": "########## WARNING: GLOBAL SEED 1234 THIS WILL AFFECT MULTIGPU SAMPLING ##########\n", "pipe": "stdout"}
+{"event": "line", "data": "########## WARNING: GLOBAL SEED 1234 THIS WILL AFFECT MULTIGPU SAMPLING ##########\n", "pipe": "stdout"}
+{"event": "line", "data": "########## WARNING: GLOBAL SEED 1234 THIS WILL AFFECT MULTIGPU SAMPLING ##########\n", "pipe": "stdout"}
+{"event": "line", "data": "\n", "pipe": "stdout"}
+{"event": "line", "data": "Global seed set to 1234\n", "pipe": "stderr"}
+{"event": "line", "data": "\n", "pipe": "stderr"}
+{"event": "line", "data": "############################################################################\n", "pipe": "stderr"}
+{"event": "line", "data": "#\n", "pipe": "stderr"}
+{"event": "line", "data": "# RWKV-4 TF32 on 1x1 GPU, bsz 1x1x16=16, ddp_find_unused_parameters_false \n", "pipe": "stderr"}
+{"event": "line", "data": "#\n", "pipe": "stderr"}
+{"event": "line", "data": "# Data = (dummy), ProjDir = /Users/satyaortiz-gagne/travail/mila/milabench/proj/rwkv/\n", "pipe": "stderr"}
+{"event": "line", "data": "#\n", "pipe": "stderr"}
+{"event": "line", "data": "# Epoch = 0 to 19 (will continue afterwards), save every 0 epoch\n", "pipe": "stderr"}
+{"event": "line", "data": "#\n", "pipe": "stderr"}
+{"event": "line", "data": "# Each \"epoch\" = 1000 steps, 16000 samples, 2048000 tokens\n", "pipe": "stderr"}
+{"event": "line", "data": "#\n", "pipe": "stderr"}
+{"event": "line", "data": "# Model = 12 n_layer, 768 n_embd, 128 ctx_len\n", "pipe": "stderr"}
+{"event": "line", "data": "#\n", "pipe": "stderr"}
+{"event": "line", "data": "# Adam = lr 0.0006 to 1e-05, warmup 0 steps, beta (0.9, 0.99), eps 1e-08\n", "pipe": "stderr"}
+{"event": "line", "data": "#\n", "pipe": "stderr"}
+{"event": "line", "data": "# Found torch 2.1.0+cu118, recommend 1.13.1+cu117 or newer\n", "pipe": "stderr"}
+{"event": "line", "data": "# Found deepspeed 0.12.2, recommend 0.7.0 (faster than newer versions)\n", "pipe": "stderr"}
+{"event": "line", "data": "# Found pytorch_lightning 1.9.5, recommend 1.9.1 or newer\n", "pipe": "stderr"}
+{"event": "line", "data": "#\n", "pipe": "stderr"}
+{"event": "line", "data": "############################################################################\n", "pipe": "stderr"}
+{"event": "line", "data": "\n", "pipe": "stderr"}
+{"event": "line", "data": "{'load_model': '', 'wandb': '', 'proj_dir': '/Users/satyaortiz-gagne/travail/mila/milabench/proj/rwkv/', 'random_seed': 1234, 'data_file': '', 'data_type': 'dummy', 'vocab_size': 0, 'ctx_len': 128, 'epoch_steps': 1000, 'epoch_count': 20, 'epoch_begin': 0, 'epoch_save': 0, 'micro_bsz': 16, 'n_layer': 12, 'n_embd': 768, 'dim_att': 768, 'dim_ffn': 3072, 'pre_ffn': 0, 'head_qk': 0, 'tiny_att_dim': 0, 'tiny_att_layer': -999, 'lr_init': 0.0006, 'lr_final': 1e-05, 'warmup_steps': 0, 'beta1': 0.9, 'beta2': 0.99, 'adam_eps': 1e-08, 'grad_cp': 0, 'my_pile_version': 1, 'my_pile_stage': 0, 'my_pile_shift': -1, 'my_pile_edecay': 0, 'layerwise_lr': 1, 'ds_bucket_mb': 200, 'my_img_version': 0, 'my_img_size': 0, 'my_img_bit': 0, 'my_img_clip': 'x', 'my_img_clip_scale': 1, 'my_img_l1_scale': 0, 'my_img_encoder': 'x', 'my_sample_len': 0, 'my_ffn_shift': 1, 'my_att_shift': 1, 'my_pos_emb': 0, 'load_partial': 0, 'magic_prime': 0, 'my_qa_mask': 0, 'my_testing': '', 'logger': False, 'enable_checkpointing': False, 'default_root_dir': None, 'gradient_clip_val': 1.0, 'gradient_clip_algorithm': None, 'num_nodes': 1, 'num_processes': None, 'devices': '1', 'gpus': None, 'auto_select_gpus': None, 'tpu_cores': None, 'ipus': None, 'enable_progress_bar': False, 'overfit_batches': 0.0, 'track_grad_norm': -1, 'check_val_every_n_epoch': 100000000000000000000, 'fast_dev_run': False, 'accumulate_grad_batches': None, 'max_epochs': -1, 'min_epochs': None, 'max_steps': -1, 'min_steps': None, 'max_time': None, 'limit_train_batches': None, 'limit_val_batches': None, 'limit_test_batches': None, 'limit_predict_batches': None, 'val_check_interval': None, 'log_every_n_steps': 100000000000000000000, 'accelerator': 'gpu', 'strategy': 'ddp_find_unused_parameters_false', 'sync_batchnorm': False, 'precision': 'tf32', 'enable_model_summary': True, 'num_sanity_val_steps': 0, 'resume_from_checkpoint': None, 'profiler': None, 'benchmark': None, 'reload_dataloaders_every_n_epochs': 0, 'auto_lr_find': False, 'replace_sampler_ddp': False, 'detect_anomaly': False, 'auto_scale_batch_size': False, 'plugins': None, 'amp_backend': None, 'amp_level': None, 'move_metrics_to_cpu': False, 'multiple_trainloader_mode': 'max_size_cycle', 'inference_mode': True, 'my_timestamp': '2024-04-02-17-35-27', 'betas': (0.9, 0.99), 'real_bsz': 16, 'run_name': '0 ctx128 L12 D768'}\n", "pipe": "stderr"}
+{"event": "line", "data": "\n", "pipe": "stderr"}
+{"event": "line", "data": "Building dummy data...\n", "pipe": "stderr"}
+{"event": "line", "data": "Building token list...\n", "pipe": "stderr"}
+{"event": "line", "data": "Data has 1620950 tokens, 13 vocab size.\n", "pipe": "stderr"}
+{"event": "line", "data": "RWKV_MY_TESTING \n", "pipe": "stdout"}
+{"event": "line", "data": "Using /mnt/Users/satyaortiz-gagne/travail/mila/milabench/cache/torch_extensions/py310_cu118 as PyTorch extensions root...\n", "pipe": "stderr"}
+{"event": "line", "data": "Detected CUDA files, patching ldflags\n", "pipe": "stderr"}
+{"event": "line", "data": "Emitting ninja build file /mnt/Users/satyaortiz-gagne/travail/mila/milabench/cache/torch_extensions/py310_cu118/wkv_128/build.ninja...\n", "pipe": "stderr"}
+{"event": "line", "data": "Building extension module wkv_128...\n", "pipe": "stderr"}
+{"event": "line", "data": "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", "pipe": "stderr"}
+{"event": "line", "data": "ninja: no work to do.\n", "pipe": "stdout"}
+{"event": "line", "data": "Loading extension module wkv_128...\n", "pipe": "stderr"}
+{"event": "line", "data": "\n", "pipe": "stdout"}
+{"event": "line", "data": "############################################################################\n", "pipe": "stdout"}
+{"event": "line", "data": "#\n", "pipe": "stdout"}
+{"event": "line", "data": "# Init model weight (slow for large models)...\n", "pipe": "stdout"}
+{"event": "line", "data": "#\n", "pipe": "stdout"}
+{"event": "line", "data": "############################################################################\n", "pipe": "stdout"}
+{"event": "line", "data": "\n", "pipe": "stdout"}
+{"event": "line", "data": "13 768 -0.0006 emb.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.0.att.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 1.0 blocks.0.att.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.0.att.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.0.att.output.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "3072 768 1.0 blocks.0.ffn.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.0.ffn.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 3072 0 blocks.0.ffn.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.1.att.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 1.0 blocks.1.att.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.1.att.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.1.att.output.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "3072 768 1.0 blocks.1.ffn.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.1.ffn.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 3072 0 blocks.1.ffn.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.2.att.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 1.0 blocks.2.att.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.2.att.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.2.att.output.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "3072 768 1.0 blocks.2.ffn.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.2.ffn.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 3072 0 blocks.2.ffn.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.3.att.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 1.0 blocks.3.att.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.3.att.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.3.att.output.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "3072 768 1.0 blocks.3.ffn.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.3.ffn.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 3072 0 blocks.3.ffn.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.4.att.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 1.0 blocks.4.att.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.4.att.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.4.att.output.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "3072 768 1.0 blocks.4.ffn.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.4.ffn.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 3072 0 blocks.4.ffn.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.5.att.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 1.0 blocks.5.att.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.5.att.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.5.att.output.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "3072 768 1.0 blocks.5.ffn.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.5.ffn.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 3072 0 blocks.5.ffn.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.6.att.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 1.0 blocks.6.att.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.6.att.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.6.att.output.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "3072 768 1.0 blocks.6.ffn.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.6.ffn.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 3072 0 blocks.6.ffn.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.7.att.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 1.0 blocks.7.att.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.7.att.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.7.att.output.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "3072 768 1.0 blocks.7.ffn.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.7.ffn.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 3072 0 blocks.7.ffn.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.8.att.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 1.0 blocks.8.att.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.8.att.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.8.att.output.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "3072 768 1.0 blocks.8.ffn.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.8.ffn.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 3072 0 blocks.8.ffn.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.9.att.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 1.0 blocks.9.att.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.9.att.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.9.att.output.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "3072 768 1.0 blocks.9.ffn.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.9.ffn.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 3072 0 blocks.9.ffn.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.10.att.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 1.0 blocks.10.att.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.10.att.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.10.att.output.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "3072 768 1.0 blocks.10.ffn.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.10.ffn.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 3072 0 blocks.10.ffn.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.11.att.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 1.0 blocks.11.att.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.11.att.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.11.att.output.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "3072 768 1.0 blocks.11.ffn.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.11.ffn.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 3072 0 blocks.11.ffn.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "13 768 0.5 head.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "GPU available: True (cuda), used: True\n", "pipe": "stderr"}
+{"event": "line", "data": "TPU available: False, using: 0 TPU cores\n", "pipe": "stderr"}
+{"event": "line", "data": "IPU available: False, using: 0 IPUs\n", "pipe": "stderr"}
+{"event": "line", "data": "HPU available: False, using: 0 HPUs\n", "pipe": "stderr"}
+{"event": "line", "data": "13 768 emb.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.0.ln1.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.0.ln1.bias\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.0.ln2.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.0.ln2.bias\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.0.ln0.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.0.ln0.bias\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.0.att.time_decay\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.0.att.time_first\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.0.att.time_mix_k\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.0.att.time_mix_v\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.0.att.time_mix_r\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.0.att.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.0.att.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.0.att.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.0.att.output.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.0.ffn.time_mix_k\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.0.ffn.time_mix_r\n", "pipe": "stdout"}
+{"event": "line", "data": "3072 768 blocks.0.ffn.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.0.ffn.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 3072 blocks.0.ffn.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.1.ln1.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.1.ln1.bias\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.1.ln2.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.1.ln2.bias\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.1.att.time_decay\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.1.att.time_first\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.1.att.time_mix_k\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.1.att.time_mix_v\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.1.att.time_mix_r\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.1.att.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.1.att.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.1.att.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.1.att.output.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.1.ffn.time_mix_k\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.1.ffn.time_mix_r\n", "pipe": "stdout"}
+{"event": "line", "data": "3072 768 blocks.1.ffn.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.1.ffn.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 3072 blocks.1.ffn.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.2.ln1.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.2.ln1.bias\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.2.ln2.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.2.ln2.bias\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.2.att.time_decay\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.2.att.time_first\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.2.att.time_mix_k\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.2.att.time_mix_v\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.2.att.time_mix_r\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.2.att.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.2.att.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.2.att.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.2.att.output.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.2.ffn.time_mix_k\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.2.ffn.time_mix_r\n", "pipe": "stdout"}
+{"event": "line", "data": "3072 768 blocks.2.ffn.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.2.ffn.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 3072 blocks.2.ffn.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.3.ln1.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.3.ln1.bias\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.3.ln2.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.3.ln2.bias\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.3.att.time_decay\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.3.att.time_first\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.3.att.time_mix_k\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.3.att.time_mix_v\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.3.att.time_mix_r\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.3.att.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.3.att.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.3.att.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.3.att.output.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.3.ffn.time_mix_k\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.3.ffn.time_mix_r\n", "pipe": "stdout"}
+{"event": "line", "data": "3072 768 blocks.3.ffn.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.3.ffn.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 3072 blocks.3.ffn.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.4.ln1.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.4.ln1.bias\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.4.ln2.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.4.ln2.bias\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.4.att.time_decay\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.4.att.time_first\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.4.att.time_mix_k\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.4.att.time_mix_v\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.4.att.time_mix_r\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.4.att.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.4.att.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.4.att.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.4.att.output.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.4.ffn.time_mix_k\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.4.ffn.time_mix_r\n", "pipe": "stdout"}
+{"event": "line", "data": "3072 768 blocks.4.ffn.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.4.ffn.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 3072 blocks.4.ffn.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.5.ln1.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.5.ln1.bias\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.5.ln2.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.5.ln2.bias\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.5.att.time_decay\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.5.att.time_first\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.5.att.time_mix_k\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.5.att.time_mix_v\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.5.att.time_mix_r\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.5.att.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.5.att.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.5.att.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.5.att.output.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.5.ffn.time_mix_k\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.5.ffn.time_mix_r\n", "pipe": "stdout"}
+{"event": "line", "data": "3072 768 blocks.5.ffn.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.5.ffn.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 3072 blocks.5.ffn.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.6.ln1.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.6.ln1.bias\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.6.ln2.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.6.ln2.bias\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.6.att.time_decay\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.6.att.time_first\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.6.att.time_mix_k\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.6.att.time_mix_v\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.6.att.time_mix_r\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.6.att.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.6.att.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.6.att.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.6.att.output.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.6.ffn.time_mix_k\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.6.ffn.time_mix_r\n", "pipe": "stdout"}
+{"event": "line", "data": "3072 768 blocks.6.ffn.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.6.ffn.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 3072 blocks.6.ffn.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.7.ln1.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.7.ln1.bias\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.7.ln2.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.7.ln2.bias\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.7.att.time_decay\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.7.att.time_first\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.7.att.time_mix_k\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.7.att.time_mix_v\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.7.att.time_mix_r\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.7.att.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.7.att.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.7.att.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.7.att.output.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.7.ffn.time_mix_k\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.7.ffn.time_mix_r\n", "pipe": "stdout"}
+{"event": "line", "data": "3072 768 blocks.7.ffn.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.7.ffn.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 3072 blocks.7.ffn.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.8.ln1.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.8.ln1.bias\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.8.ln2.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.8.ln2.bias\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.8.att.time_decay\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.8.att.time_first\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.8.att.time_mix_k\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.8.att.time_mix_v\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.8.att.time_mix_r\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.8.att.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.8.att.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.8.att.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.8.att.output.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.8.ffn.time_mix_k\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.8.ffn.time_mix_r\n", "pipe": "stdout"}
+{"event": "line", "data": "3072 768 blocks.8.ffn.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.8.ffn.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 3072 blocks.8.ffn.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.9.ln1.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.9.ln1.bias\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.9.ln2.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.9.ln2.bias\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.9.att.time_decay\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.9.att.time_first\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.9.att.time_mix_k\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.9.att.time_mix_v\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.9.att.time_mix_r\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.9.att.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.9.att.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.9.att.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.9.att.output.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.9.ffn.time_mix_k\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.9.ffn.time_mix_r\n", "pipe": "stdout"}
+{"event": "line", "data": "3072 768 blocks.9.ffn.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.9.ffn.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 3072 blocks.9.ffn.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.10.ln1.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.10.ln1.bias\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.10.ln2.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.10.ln2.bias\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.10.att.time_decay\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.10.att.time_first\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.10.att.time_mix_k\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.10.att.time_mix_v\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.10.att.time_mix_r\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.10.att.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.10.att.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.10.att.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.10.att.output.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.10.ffn.time_mix_k\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.10.ffn.time_mix_r\n", "pipe": "stdout"}
+{"event": "line", "data": "3072 768 blocks.10.ffn.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.10.ffn.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 3072 blocks.10.ffn.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.11.ln1.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.11.ln1.bias\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.11.ln2.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.11.ln2.bias\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.11.att.time_decay\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.11.att.time_first\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.11.att.time_mix_k\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.11.att.time_mix_v\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.11.att.time_mix_r\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.11.att.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.11.att.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.11.att.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.11.att.output.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.11.ffn.time_mix_k\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.11.ffn.time_mix_r\n", "pipe": "stdout"}
+{"event": "line", "data": "3072 768 blocks.11.ffn.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.11.ffn.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 3072 blocks.11.ffn.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 ln_out.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 ln_out.bias\n", "pipe": "stdout"}
+{"event": "line", "data": "13 768 head.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "[rank: 0] Global seed set to 1234\n", "pipe": "stderr"}
+{"event": "line", "data": "Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/1\n", "pipe": "stderr"}
+{"event": "line", "data": "----------------------------------------------------------------------------------------------------\n", "pipe": "stderr"}
+{"event": "line", "data": "distributed_backend=nccl\n", "pipe": "stderr"}
+{"event": "line", "data": "All distributed processes registered. Starting with 1 processes\n", "pipe": "stderr"}
+{"event": "line", "data": "----------------------------------------------------------------------------------------------------\n", "pipe": "stderr"}
+{"event": "line", "data": "\n", "pipe": "stderr"}
+{"event": "line", "data": "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n", "pipe": "stderr"}
+{"event": "line", "data": "Installed CUDA version 11.5 does not match the version torch was compiled with 11.8 but since the APIs are compatible, accepting this combination\n", "pipe": "stdout"}
+{"event": "line", "data": "Using /mnt/Users/satyaortiz-gagne/travail/mila/milabench/cache/torch_extensions/py310_cu118 as PyTorch extensions root...\n", "pipe": "stderr"}
+{"event": "line", "data": "Detected CUDA files, patching ldflags\n", "pipe": "stderr"}
+{"event": "line", "data": "Emitting ninja build file /mnt/Users/satyaortiz-gagne/travail/mila/milabench/cache/torch_extensions/py310_cu118/fused_adam/build.ninja...\n", "pipe": "stderr"}
+{"event": "line", "data": "Building extension module fused_adam...\n", "pipe": "stderr"}
+{"event": "line", "data": "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", "pipe": "stderr"}
+{"event": "line", "data": "[1/2] /usr/bin/nvcc -DTORCH_EXTENSION_NAME=fused_adam -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\\\"_gcc\\\" -DPYBIND11_STDLIB=\\\"_libstdcpp\\\" -DPYBIND11_BUILD_ABI=\\\"_cxxabi1011\\\" -I/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/deepspeed/ops/csrc/includes -I/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/deepspeed/ops/csrc/adam -isystem /Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/include -isystem /Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/include/torch/csrc/api/include -isystem /Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/include/TH -isystem /Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/include/THC -isystem /home/ubuntu/miniconda3/include/python3.10 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_80,code=compute_80 -gencode=arch=compute_80,code=sm_80 --compiler-options '-fPIC' -O3 -DVERSION_GE_1_1 -DVERSION_GE_1_3 -DVERSION_GE_1_5 -lineinfo --use_fast_math -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_80,code=compute_80 -DBF16_AVAILABLE -U__CUDA_NO_BFLOAT16_OPERATORS__ -U__CUDA_NO_BFLOAT162_OPERATORS__ -std=c++17 -c /Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/deepspeed/ops/csrc/adam/multi_tensor_adam.cu -o multi_tensor_adam.cuda.o \n", "pipe": "stdout"}
+{"event": "line", "data": "FAILED: multi_tensor_adam.cuda.o \n", "pipe": "stdout"}
+{"event": "line", "data": "/usr/bin/nvcc -DTORCH_EXTENSION_NAME=fused_adam -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\\\"_gcc\\\" -DPYBIND11_STDLIB=\\\"_libstdcpp\\\" -DPYBIND11_BUILD_ABI=\\\"_cxxabi1011\\\" -I/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/deepspeed/ops/csrc/includes -I/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/deepspeed/ops/csrc/adam -isystem /Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/include -isystem /Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/include/torch/csrc/api/include -isystem /Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/include/TH -isystem /Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/include/THC -isystem /home/ubuntu/miniconda3/include/python3.10 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_80,code=compute_80 -gencode=arch=compute_80,code=sm_80 --compiler-options '-fPIC' -O3 -DVERSION_GE_1_1 -DVERSION_GE_1_3 -DVERSION_GE_1_5 -lineinfo --use_fast_math -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_80,code=compute_80 -DBF16_AVAILABLE -U__CUDA_NO_BFLOAT16_OPERATORS__ -U__CUDA_NO_BFLOAT162_OPERATORS__ -std=c++17 -c /Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/deepspeed/ops/csrc/adam/multi_tensor_adam.cu -o multi_tensor_adam.cuda.o \n", "pipe": "stdout"}
+{"event": "line", "data": "/usr/include/c++/11/bits/std_function.h:435:145: error: parameter packs not expanded with \u2018...\u2019:\n", "pipe": "stdout"}
+{"event": "line", "data": " 435 | function(_Functor&& __f)\n", "pipe": "stdout"}
+{"event": "line", "data": " | ^ \n", "pipe": "stdout"}
+{"event": "line", "data": "/usr/include/c++/11/bits/std_function.h:435:145: note: \u2018_ArgTypes\u2019\n", "pipe": "stdout"}
+{"event": "line", "data": "/usr/include/c++/11/bits/std_function.h:530:146: error: parameter packs not expanded with \u2018...\u2019:\n", "pipe": "stdout"}
+{"event": "line", "data": " 530 | operator=(_Functor&& __f)\n", "pipe": "stdout"}
+{"event": "line", "data": " | ^ \n", "pipe": "stdout"}
+{"event": "line", "data": "/usr/include/c++/11/bits/std_function.h:530:146: note: \u2018_ArgTypes\u2019\n", "pipe": "stdout"}
+{"event": "line", "data": "ninja: build stopped: subcommand failed.\n", "pipe": "stdout"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1574.375, 81920.0], "load": 0.02, "temperature": 45, "power": 69.155}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [887.5625, 81920.0], "load": 0, "temperature": 44, "power": 48.303}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2680.375, 81920.0], "load": 0, "temperature": 45, "power": 68.666}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2680.375, 81920.0], "load": 0, "temperature": 45, "power": 68.472}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2680.375, 81920.0], "load": 0, "temperature": 45, "power": 68.459}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2680.375, 81920.0], "load": 0, "temperature": 45, "power": 68.667}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2680.375, 81920.0], "load": 0, "temperature": 45, "power": 68.667}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2680.375, 81920.0], "load": 0, "temperature": 45, "power": 68.569}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2680.375, 81920.0], "load": 0, "temperature": 45, "power": 68.667}}}, "pipe": "data"}
+{"event": "error", "data": {"type": "RuntimeError", "message": "Error building extension 'fused_adam'"}, "pipe": "data"}
+{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/utils/cpp_extension.py\", line 2100, in _run_ninja_build\n", "pipe": "stderr"}
+{"event": "line", "data": " subprocess.run(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/home/ubuntu/miniconda3/lib/python3.10/subprocess.py\", line 526, in run\n", "pipe": "stderr"}
+{"event": "line", "data": " raise CalledProcessError(retcode, process.args,\n", "pipe": "stderr"}
+{"event": "line", "data": "subprocess.CalledProcessError: Command '['ninja', '-v']' returned non-zero exit status 1.\n", "pipe": "stderr"}
+{"event": "line", "data": "\n", "pipe": "stderr"}
+{"event": "line", "data": "The above exception was the direct cause of the following exception:\n", "pipe": "stderr"}
+{"event": "line", "data": "\n", "pipe": "stderr"}
+{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"}
+{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"}
+{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"}
+{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"}
+{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/rwkv/rwkv-v4neo/train.py\", line 420, in \n", "pipe": "stderr"}
+{"event": "line", "data": " trainer.fit(model, data_loader)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py\", line 608, in fit\n", "pipe": "stderr"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "line", "data": " call._call_and_handle_interrupt(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/pytorch_lightning/trainer/call.py\", line 36, in _call_and_handle_interrupt\n", "pipe": "stderr"}
+{"event": "line", "data": " return trainer.strategy.launcher.launch(trainer_fn, *args, trainer=trainer, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/pytorch_lightning/strategies/launchers/subprocess_script.py\", line 88, in launch\n", "pipe": "stderr"}
+{"event": "line", "data": " return function(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py\", line 650, in _fit_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " self._run(model, ckpt_path=self.ckpt_path)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py\", line 1093, in _run\n", "pipe": "stderr"}
+{"event": "line", "data": " self.strategy.setup(self)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/pytorch_lightning/strategies/ddp.py\", line 181, in setup\n", "pipe": "stderr"}
+{"event": "line", "data": " self.setup_optimizers(trainer)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/pytorch_lightning/strategies/strategy.py\", line 142, in setup_optimizers\n", "pipe": "stderr"}
+{"event": "line", "data": " self.optimizers, self.lr_scheduler_configs, self.optimizer_frequencies = _init_optimizers_and_lr_schedulers(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/pytorch_lightning/core/optimizer.py\", line 180, in _init_optimizers_and_lr_schedulers\n", "pipe": "stderr"}
+{"event": "line", "data": " optim_conf = model.trainer._call_lightning_module_hook(\"configure_optimizers\", pl_module=model)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py\", line 1356, in _call_lightning_module_hook\n", "pipe": "stderr"}
+{"event": "line", "data": " output = fn(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/rwkv/rwkv-v4neo/src/model.py\", line 606, in configure_optimizers\n", "pipe": "stderr"}
+{"event": "line", "data": " return FusedAdam(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/deepspeed/ops/adam/fused_adam.py\", line 94, in __init__\n", "pipe": "stderr"}
+{"event": "line", "data": " fused_adam_cuda = FusedAdamBuilder().load()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/deepspeed/ops/op_builder/builder.py\", line 452, in load\n", "pipe": "stderr"}
+{"event": "line", "data": " return self.jit_load(verbose)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/deepspeed/ops/op_builder/builder.py\", line 501, in jit_load\n", "pipe": "stderr"}
+{"event": "line", "data": " op_module = load(name=self.name,\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/utils/cpp_extension.py\", line 1308, in load\n", "pipe": "stderr"}
+{"event": "line", "data": " return _jit_compile(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/utils/cpp_extension.py\", line 1710, in _jit_compile\n", "pipe": "stderr"}
+{"event": "line", "data": " _write_ninja_file_and_build_library(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/utils/cpp_extension.py\", line 1823, in _write_ninja_file_and_build_library\n", "pipe": "stderr"}
+{"event": "line", "data": " _run_ninja_build(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/utils/cpp_extension.py\", line 2116, in _run_ninja_build\n", "pipe": "stderr"}
+{"event": "line", "data": " raise RuntimeError(message) from e\n", "pipe": "stderr"}
+{"event": "line", "data": "RuntimeError: Error building extension 'fused_adam'\n", "pipe": "stderr"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/rwkv/voirconf-rwkv.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/rwkv/rwkv-v4neo/train.py", "--data_type", "dummy", "--ctx_len", "128", "--epoch_steps", "1000", "--epoch_count", "20", "--epoch_begin", "0", "--epoch_save", "0", "--micro_bsz", "16", "--n_layer", "12", "--n_embd", "768", "--pre_ffn", "0", "--head_qk", "0", "--lr_init", "6e-4", "--lr_final", "1e-5", "--warmup_steps", "0", "--beta1", "0.9", "--beta2", "0.99", "--adam_eps", "1e-8", "--accelerator", "gpu", "--devices", "1", "--precision", "tf32", "--strategy", "ddp_find_unused_parameters_false", "--grad_cp", "0", "--random_seed", "1234", "--enable_progress_bar", "False"], "time": 1712079350.9924312, "return_code": 1}, "pipe": null}
diff --git a/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/stargan.D0.data b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/stargan.D0.data
new file mode 100644
index 000000000..07f49a8e6
--- /dev/null
+++ b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/stargan.D0.data
@@ -0,0 +1,682 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "stargan", "install_group": "torch", "install_variant": "cuda", "run_name": "majaguma.2024-04-02_16:55:21.895752", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "b7a983fcf74c005cfc04d84913c69872", "tags": ["gan", "resnet", "vision"], "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/stargan", "plan": {"method": "per_gpu"}, "argv": {"--image_size": 512, "--c_dim": 5, "--batch_size": 16}, "weight": 1.0, "name": "stargan", "tag": ["stargan", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "trustingspider", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-b19ad74c-adf3-683a-1bdd-0ce516ef4aae": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 26208.375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.31992645263671876}, "temperature": 73, "power": 101.22, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712078897.253788, "milabench": {"tag": "paice-v1-10-g1243bba", "commit": "1243bbae76bfc8105d553472dcfce834b54a723e", "date": "2024-04-02 12:02:23 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan/voirconf-stargan.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/stargan/stargan/main.py", "--image_size", "512", "--c_dim", "5", "--batch_size", "16"], "time": 1712078897.2705288}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "line", "data": "Namespace(c_dim=5, c2_dim=8, celeba_crop_size=178, rafd_crop_size=256, image_size=512, g_conv_dim=64, d_conv_dim=64, g_repeat_num=6, d_repeat_num=6, lambda_cls=1, lambda_rec=10, lambda_gp=10, dataset='synth', batch_size=16, num_iters=200000, num_iters_decay=100000, g_lr=0.0001, d_lr=0.0001, n_critic=5, beta1=0.5, beta2=0.999, resume_iters=None, selected_attrs=['Black_Hair', 'Blond_Hair', 'Brown_Hair', 'Male', 'Young'], test_iters=200000, num_workers=1, mode='train', use_tensorboard=False, celeba_image_dir='data/celeba/images', attr_path='data/celeba/list_attr_celeba.txt', rafd_image_dir='data/RaFD/train', log_dir='/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan/logs', model_save_dir='/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan/models', sample_dir='/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan/samples', result_dir='/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan/results', log_step=10, sample_step=1000, model_save_step=10000, lr_update_step=1000)\n", "pipe": "stdout"}
+{"event": "line", "data": "Generator(\n", "pipe": "stdout"}
+{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"}
+{"event": "line", "data": " (0): Conv2d(8, 64, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (1): InstanceNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (3): Conv2d(64, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (4): InstanceNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (5): ReLU(inplace=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (6): Conv2d(128, 256, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (7): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (8): ReLU(inplace=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (9): ResidualBlock(\n", "pipe": "stdout"}
+{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"}
+{"event": "line", "data": " (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (1): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (4): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " )\n", "pipe": "stdout"}
+{"event": "line", "data": " )\n", "pipe": "stdout"}
+{"event": "line", "data": " (10): ResidualBlock(\n", "pipe": "stdout"}
+{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"}
+{"event": "line", "data": " (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (1): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (4): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " )\n", "pipe": "stdout"}
+{"event": "line", "data": " )\n", "pipe": "stdout"}
+{"event": "line", "data": " (11): ResidualBlock(\n", "pipe": "stdout"}
+{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"}
+{"event": "line", "data": " (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (1): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (4): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " )\n", "pipe": "stdout"}
+{"event": "line", "data": " )\n", "pipe": "stdout"}
+{"event": "line", "data": " (12): ResidualBlock(\n", "pipe": "stdout"}
+{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"}
+{"event": "line", "data": " (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (1): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (4): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " )\n", "pipe": "stdout"}
+{"event": "line", "data": " )\n", "pipe": "stdout"}
+{"event": "line", "data": " (13): ResidualBlock(\n", "pipe": "stdout"}
+{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"}
+{"event": "line", "data": " (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (1): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (4): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " )\n", "pipe": "stdout"}
+{"event": "line", "data": " )\n", "pipe": "stdout"}
+{"event": "line", "data": " (14): ResidualBlock(\n", "pipe": "stdout"}
+{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"}
+{"event": "line", "data": " (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (1): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (4): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " )\n", "pipe": "stdout"}
+{"event": "line", "data": " )\n", "pipe": "stdout"}
+{"event": "line", "data": " (15): ConvTranspose2d(256, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (16): InstanceNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (17): ReLU(inplace=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (18): ConvTranspose2d(128, 64, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (19): InstanceNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (20): ReLU(inplace=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (21): Conv2d(64, 3, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (22): Tanh()\n", "pipe": "stdout"}
+{"event": "line", "data": " )\n", "pipe": "stdout"}
+{"event": "line", "data": ")\n", "pipe": "stdout"}
+{"event": "line", "data": "G\n", "pipe": "stdout"}
+{"event": "line", "data": "The number of parameters: 8430528\n", "pipe": "stdout"}
+{"event": "line", "data": "Discriminator(\n", "pipe": "stdout"}
+{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"}
+{"event": "line", "data": " (0): Conv2d(3, 64, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", "pipe": "stdout"}
+{"event": "line", "data": " (1): LeakyReLU(negative_slope=0.01)\n", "pipe": "stdout"}
+{"event": "line", "data": " (2): Conv2d(64, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", "pipe": "stdout"}
+{"event": "line", "data": " (3): LeakyReLU(negative_slope=0.01)\n", "pipe": "stdout"}
+{"event": "line", "data": " (4): Conv2d(128, 256, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", "pipe": "stdout"}
+{"event": "line", "data": " (5): LeakyReLU(negative_slope=0.01)\n", "pipe": "stdout"}
+{"event": "line", "data": " (6): Conv2d(256, 512, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", "pipe": "stdout"}
+{"event": "line", "data": " (7): LeakyReLU(negative_slope=0.01)\n", "pipe": "stdout"}
+{"event": "line", "data": " (8): Conv2d(512, 1024, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", "pipe": "stdout"}
+{"event": "line", "data": " (9): LeakyReLU(negative_slope=0.01)\n", "pipe": "stdout"}
+{"event": "line", "data": " (10): Conv2d(1024, 2048, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", "pipe": "stdout"}
+{"event": "line", "data": " (11): LeakyReLU(negative_slope=0.01)\n", "pipe": "stdout"}
+{"event": "line", "data": " )\n", "pipe": "stdout"}
+{"event": "line", "data": " (conv1): Conv2d(2048, 1, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (conv2): Conv2d(2048, 5, kernel_size=(8, 8), stride=(1, 1), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": ")\n", "pipe": "stdout"}
+{"event": "line", "data": "D\n", "pipe": "stdout"}
+{"event": "line", "data": "The number of parameters: 45376448\n", "pipe": "stdout"}
+{"event": "data", "data": {"task": "train", "progress": [0, 10000]}, "pipe": "data"}
+{"event": "line", "data": "Start training...\n", "pipe": "stdout"}
+{"event": "data", "data": {"task": "train", "progress": [1, 10000]}, "pipe": "data"}
+{"event": "line", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='sum' instead.\n", "pipe": "stderr"}
+{"event": "line", "data": " warnings.warn(warning.format(ret))\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "train", "loss": 13.089741706848145}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [53428.375, 81920.0], "load": 1.0, "temperature": 70, "power": 100.303}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [19484.375, 81920.0], "load": 1.0, "temperature": 71, "power": 139.052}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [2, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 12.317051887512207}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [3, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 10.644302368164062}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [4, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.848727703094482}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 8.103803739898403, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [5, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 4.238846778869629}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 62.85356381808514, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [29784.375, 81920.0], "load": 0.99, "temperature": 75, "power": 259.957}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [6, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 3.218193292617798}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [7, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.423828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 6.441458616433847, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31678.375, 81920.0], "load": 1.0, "temperature": 73, "power": 330.939}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [8, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 3.8139472007751465}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [9, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 2.672729730606079}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [10, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 2.4927239418029785}, "pipe": "data"}
+{"event": "line", "data": "Elapsed [0:00:12], Iteration [10/200000], D/loss_real: -1.2637, D/loss_fake: 0.0986, D/loss_cls: 3.2617, D/loss_gp: 0.0396, G/loss_fake: -0.8299, G/loss_rec: 0.5488, G/loss_cls: 3.4044\n", "pipe": "stdout"}
+{"event": "data", "data": {"task": "train", "rate": 44.362861183804156, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [11, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 1.7979466915130615}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [12, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.576117515563965}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [13, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.983277797698975}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [14, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.448564052581787}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [15, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 11.224003791809082}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.55333479758454, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36418.375, 81920.0], "load": 1.0, "temperature": 75, "power": 297.802}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [16, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.98637580871582}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [17, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.404041767120361}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 26.85794672508046, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [18, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 5.861074924468994}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [19, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 3.728839635848999}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [20, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 3.7439486980438232}, "pipe": "data"}
+{"event": "line", "data": "Elapsed [0:00:16], Iteration [20/200000], D/loss_real: 0.0041, D/loss_fake: -0.3487, D/loss_cls: 3.3985, D/loss_gp: 0.0690, G/loss_fake: 0.3632, G/loss_rec: 0.5314, G/loss_cls: 3.4477\n", "pipe": "stdout"}
+{"event": "data", "data": {"task": "train", "rate": 44.500532049751826, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [21, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.070382118225098}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [22, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 8.543978691101074}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [23, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 4.056460380554199}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [24, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.90916633605957}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [25, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 4.284873962402344}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.51226369361384, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36418.375, 81920.0], "load": 0.96, "temperature": 75, "power": 301.146}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [26, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.841668128967285}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [27, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.380107402801514}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 26.784126623726365, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [28, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 4.945100784301758}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [29, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 3.168574333190918}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [30, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 2.5670511722564697}, "pipe": "data"}
+{"event": "line", "data": "Elapsed [0:00:19], Iteration [30/200000], D/loss_real: -2.5096, D/loss_fake: 1.2898, D/loss_cls: 3.4684, D/loss_gp: 0.0318, G/loss_fake: -1.3278, G/loss_rec: 0.5435, G/loss_cls: 3.3680\n", "pipe": "stdout"}
+{"event": "data", "data": {"task": "train", "rate": 44.74442549058152, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [31, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 3.532721757888794}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [32, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 2.50101637840271}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [33, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 1.7967890501022339}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36418.375, 81920.0], "load": 0.92, "temperature": 76, "power": 252.409}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [34, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 1.220597505569458}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [35, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.7649186253547668}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.3234911378094, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [36, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 2.5370545387268066}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [37, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 3.27542781829834}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 26.885388307150418, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [38, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 2.046738386154175}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [39, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.9461417198181152}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [40, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.2733052968978882}, "pipe": "data"}
+{"event": "line", "data": "Elapsed [0:00:23], Iteration [40/200000], D/loss_real: -4.1574, D/loss_fake: 0.3679, D/loss_cls: 3.4969, D/loss_gp: 0.0566, G/loss_fake: -0.2052, G/loss_rec: 0.5576, G/loss_cls: 3.4414\n", "pipe": "stdout"}
+{"event": "data", "data": {"task": "train", "rate": 43.95507960091843, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [41, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.7831344604492188}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37442.375, 81920.0], "load": 1.0, "temperature": 76, "power": 277.74}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [42, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.49488839507102966}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [43, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.29238027334213257}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [44, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.019767314195632935}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [45, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": -0.31068718433380127}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.34541434629127, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [46, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.8902745842933655}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [47, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 1.2322750091552734}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 27.916079293093336, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [48, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.40593504905700684}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [49, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.4870221018791199}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [50, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.7905704975128174}, "pipe": "data"}
+{"event": "line", "data": "Elapsed [0:00:26], Iteration [50/200000], D/loss_real: -5.1061, D/loss_fake: 1.1584, D/loss_cls: 4.3119, D/loss_gp: 0.0426, G/loss_fake: -0.2823, G/loss_rec: 0.5366, G/loss_cls: 3.4462", "pipe": "stdout"}
+{"event": "line", "data": "\n", "pipe": "stdout"}
+{"event": "data", "data": {"task": "train", "rate": 42.69129624313872, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [51, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 1.5668803453445435}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37442.375, 81920.0], "load": 1.0, "temperature": 77, "power": 296.287}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [52, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.9807621240615845}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [53, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.9384143352508545}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [54, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.9174190163612366}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [55, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.8964967727661133}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.43898717115566, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [56, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 2.0413246154785156}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [57, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.9203179478645325}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 26.873926246503125, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [58, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 1.6606730222702026}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [59, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 2.5495665073394775}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [60, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 1.4471652507781982}, "pipe": "data"}
+{"event": "line", "data": "Elapsed [0:00:30], Iteration [60/200000], D/loss_real: -4.0746, D/loss_fake: 1.6119, D/loss_cls: 3.8760, D/loss_gp: 0.0034, G/loss_fake: -2.0161, G/loss_rec: 0.5201, G/loss_cls: 3.4230\n", "pipe": "stdout"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37442.375, 81920.0], "load": 0.99, "temperature": 76, "power": 358.845}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.71429917315668, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [61, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 2.663923978805542}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [62, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 2.712101936340332}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [63, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 1.4543116092681885}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [64, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 2.023350238800049}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [65, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 2.411146640777588}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.334419212054094, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [66, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 1.6416218280792236}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [67, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 1.8717973232269287}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 28.885113111151245, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [68, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 2.103079080581665}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [69, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 1.2871930599212646}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37442.375, 81920.0], "load": 0.93, "temperature": 76, "power": 187.765}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [70, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 1.1995863914489746}, "pipe": "data"}
+{"event": "line", "data": "Elapsed [0:00:33], Iteration [70/200000], D/loss_real: -6.7271, D/loss_fake: 3.5354, D/loss_cls: 3.5678, D/loss_gp: 0.0823, G/loss_fake: -2.5282, G/loss_rec: 0.5270, G/loss_cls: 3.6515\n", "pipe": "stdout"}
+{"event": "data", "data": {"task": "train", "rate": 40.960157776343216, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [71, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 1.961525797843933}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [72, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 1.2567733526229858}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [73, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 1.284398078918457}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [74, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 1.349828839302063}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [75, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.7634912133216858}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.24667062950582, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [76, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 1.7425315380096436}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [77, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 1.935045838356018}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37442.375, 81920.0], "load": 0.93, "temperature": 76, "power": 319.931}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 26.710782283722406, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [78, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 1.51625657081604}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [79, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 1.3935335874557495}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [80, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 1.3159737586975098}, "pipe": "data"}
+{"event": "line", "data": "Elapsed [0:00:37], Iteration [80/200000], D/loss_real: -5.4053, D/loss_fake: 3.2105, D/loss_cls: 3.3761, D/loss_gp: 0.0135, G/loss_fake: -3.0750, G/loss_rec: 0.5277, G/loss_cls: 3.4031\n", "pipe": "stdout"}
+{"event": "data", "data": {"task": "train", "rate": 44.716048480804396, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [81, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 2.3885138034820557}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [82, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 2.4682352542877197}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [83, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 2.1441540718078613}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [84, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 2.0262908935546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [85, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 1.9325515031814575}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.360431991072, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37442.375, 81920.0], "load": 1.0, "temperature": 77, "power": 305.725}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [86, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 3.0223560333251953}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [87, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 3.106226921081543}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 26.722320865767585, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [88, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 2.6552891731262207}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [89, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 2.4856133460998535}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [90, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 2.3280460834503174}, "pipe": "data"}
+{"event": "line", "data": "Elapsed [0:00:40], Iteration [90/200000], D/loss_real: -4.9272, D/loss_fake: 3.9632, D/loss_cls: 3.2878, D/loss_gp: 0.0004, G/loss_fake: -4.3415, G/loss_rec: 0.5538, G/loss_cls: 3.3516\n", "pipe": "stdout"}
+{"event": "data", "data": {"task": "train", "rate": 44.507487157049326, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [91, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 3.2114596366882324}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [92, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 3.4217722415924072}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [93, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 2.5507781505584717}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [94, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 2.2502453327178955}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [95, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 1.983329176902771}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37442.375, 81920.0], "load": 0.93, "temperature": 77, "power": 368.945}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.34861032783197, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [96, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 3.389871835708618}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [97, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 5.196514129638672}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 26.721445228563542, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [98, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 3.0577573776245117}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [99, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 3.667776346206665}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [100, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 3.360865592956543}, "pipe": "data"}
+{"event": "line", "data": "Elapsed [0:00:44], Iteration [100/200000], D/loss_real: -2.1996, D/loss_fake: 1.3916, D/loss_cls: 3.3000, D/loss_gp: 0.0869, G/loss_fake: -1.3780, G/loss_rec: 0.5768, G/loss_cls: 3.3806\n", "pipe": "stdout"}
+{"event": "data", "data": {"task": "train", "rate": 44.83059187586123, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [101, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 3.132774829864502}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [102, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 3.1603212356567383}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [103, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 2.7224862575531006}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37442.375, 81920.0], "load": 0.98, "temperature": 78, "power": 209.342}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [104, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 2.6672191619873047}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [105, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 2.721295118331909}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.3705876718548, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [106, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 3.4580485820770264}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [107, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 3.0790822505950928}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 26.789347209719587, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [108, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 2.9940168857574463}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [109, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 2.9327802658081055}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [110, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 2.5489604473114014}, "pipe": "data"}
+{"event": "line", "data": "Elapsed [0:00:47], Iteration [110/200000], D/loss_real: -5.2043, D/loss_fake: 4.3399, D/loss_cls: 3.2522, D/loss_gp: 0.0161, G/loss_fake: -3.4171, G/loss_rec: 0.5713, G/loss_cls: 3.4308", "pipe": "stdout"}
+{"event": "line", "data": "\n", "pipe": "stdout"}
+{"event": "data", "data": {"task": "train", "rate": 44.06869220986059, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [111, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 3.1736793518066406}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37442.375, 81920.0], "load": 1.0, "temperature": 78, "power": 300.578}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [112, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 2.9872524738311768}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [113, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 2.7024011611938477}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [114, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 3.045640468597412}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [115, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 2.2699198722839355}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.152547311039434, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [116, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 2.930947780609131}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [117, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 1.991608738899231}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 28.867702753537163, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [118, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 1.7082041501998901}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [119, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 1.5778729915618896}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [120, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 2.1663765907287598}, "pipe": "data"}
+{"event": "line", "data": "Elapsed [0:00:51], Iteration [120/200000], D/loss_real: -2.5564, D/loss_fake: 1.0542, D/loss_cls: 3.2038, D/loss_gp: 0.0465, G/loss_fake: -2.4015, G/loss_rec: 0.5582, G/loss_cls: 3.3508\n", "pipe": "stdout"}
+{"event": "data", "data": {"task": "train", "rate": 40.38467153484138, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [121, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 5.566779613494873}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37442.375, 81920.0], "load": 0.92, "temperature": 78, "power": 281.405}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [122, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.849420547485352}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [123, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 3.100782632827759}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [124, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 4.943386077880859}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [125, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.957638263702393}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.237947984535445, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [126, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 4.743450164794922}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [127, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 8.329449653625488}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 26.856155305956968, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [128, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.75643539428711}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [129, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 10.2974271774292}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [130, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 10.21634578704834}, "pipe": "data"}
+{"event": "line", "data": "Elapsed [0:00:54], Iteration [130/200000], D/loss_real: -0.2338, D/loss_fake: 0.1474, D/loss_cls: 3.4953, D/loss_gp: 0.6808, G/loss_fake: -0.1021, G/loss_rec: 0.5332, G/loss_cls: 3.4595\n", "pipe": "stdout"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37442.375, 81920.0], "load": 0.98, "temperature": 77, "power": 350.66}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 43.949590570421236, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [131, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.263629913330078}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [132, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.704811096191406}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [133, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 5.049165725708008}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [134, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 3.001199722290039}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [135, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 20.01687240600586}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.505042943657934, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [136, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 11.680143356323242}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [137, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 12.48331356048584}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 26.890739919507194, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [138, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 12.309514999389648}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [139, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 11.884629249572754}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37442.375, 81920.0], "load": 0.92, "temperature": 77, "power": 161.811}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [140, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 11.30716323852539}, "pipe": "data"}
+{"event": "line", "data": "Elapsed [0:00:58], Iteration [140/200000], D/loss_real: 0.1322, D/loss_fake: -0.1191, D/loss_cls: 3.3466, D/loss_gp: 0.7947, G/loss_fake: 0.1972, G/loss_rec: 0.5142, G/loss_cls: 3.3484\n", "pipe": "stdout"}
+{"event": "data", "data": {"task": "train", "rate": 43.9498588585992, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [141, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 10.482892990112305}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [142, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.06439208984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [143, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.64170503616333}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [144, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 3.9685356616973877}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [145, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 4.367649078369141}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.37724130191398, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [146, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 3.470202922821045}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37442.375, 81920.0], "load": 0.93, "temperature": 78, "power": 305.085}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [147, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 3.5152342319488525}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 28.949879568365297, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [148, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 3.2401461601257324}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [149, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 3.1975464820861816}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [150, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 4.218050956726074}, "pipe": "data"}
+{"event": "line", "data": "Elapsed [0:01:01], Iteration [150/200000], D/loss_real: -1.2048, D/loss_fake: 0.3205, D/loss_cls: 3.3487, D/loss_gp: 0.1754, G/loss_fake: 0.2283, G/loss_rec: 0.5049, G/loss_cls: 3.3464\n", "pipe": "stdout"}
+{"event": "data", "data": {"task": "train", "rate": 40.43206005989796, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [151, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 3.0931990146636963}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [152, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 2.79805326461792}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [153, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 2.6309213638305664}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [154, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 2.41560435295105}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [155, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 2.254915237426758}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.997666049807904, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37442.375, 81920.0], "load": 1.0, "temperature": 78, "power": 229.605}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [156, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 3.607158660888672}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [157, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 5.4929704666137695}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 28.827416051706482, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [158, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 8.434049606323242}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [159, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.508437633514404}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [160, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 5.6448564529418945}, "pipe": "data"}
+{"event": "line", "data": "Elapsed [0:01:05], Iteration [160/200000], D/loss_real: -0.7107, D/loss_fake: 0.3651, D/loss_cls: 3.3345, D/loss_gp: 0.2656, G/loss_fake: -1.2273, G/loss_rec: 0.5132, G/loss_cls: 3.3570\n", "pipe": "stdout"}
+{"event": "data", "data": {"task": "train", "rate": 40.57161709710732, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [161, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 3.5689890384674072}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [162, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 4.807031631469727}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [163, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 3.7881133556365967}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [164, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 3.6615917682647705}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [165, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 3.1059515476226807}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37442.375, 81920.0], "load": 0.99, "temperature": 77, "power": 164.373}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.18596391046428, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [166, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 3.761859893798828}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [167, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 3.50831937789917}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 26.774190096082005, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [168, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 3.4266552925109863}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [169, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 3.2906341552734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [170, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 3.1533851623535156}, "pipe": "data"}
+{"event": "line", "data": "Elapsed [0:01:08], Iteration [170/200000], D/loss_real: -2.6321, D/loss_fake: 2.4288, D/loss_cls: 3.3430, D/loss_gp: 0.0014, G/loss_fake: -2.3668, G/loss_rec: 0.5078, G/loss_cls: 3.3512\n", "pipe": "stdout"}
+{"event": "data", "data": {"task": "train", "rate": 44.10309255185025, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [171, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 3.6296677589416504}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [172, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 3.3898794651031494}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37442.375, 81920.0], "load": 0.91, "temperature": 78, "power": 349.589}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [173, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 3.0889828205108643}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [174, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 2.8158233165740967}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [175, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 2.5107460021972656}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.27341524044557, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [176, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 2.5628106594085693}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [177, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 2.2683067321777344}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 28.954062714644607, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [178, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 2.0418951511383057}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [179, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 1.7302507162094116}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [180, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 1.5594336986541748}, "pipe": "data"}
+{"event": "line", "data": "Elapsed [0:01:12], Iteration [180/200000], D/loss_real: -3.0206, D/loss_fake: 1.0339, D/loss_cls: 3.3110, D/loss_gp: 0.0235, G/loss_fake: -0.6357, G/loss_rec: 0.5053, G/loss_cls: 3.3933\n", "pipe": "stdout"}
+{"event": "data", "data": {"task": "train", "rate": 40.15355391589466, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [181, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 2.3772342205047607}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37442.375, 81920.0], "load": 1.0, "temperature": 78, "power": 287.055}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [182, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 2.388420581817627}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [183, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 2.4431064128875732}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [184, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 2.605264186859131}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [185, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 2.3124754428863525}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.34187725289228, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [186, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 3.582227945327759}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [187, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 2.6479787826538086}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 28.840951018318734, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [188, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 2.7377052307128906}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [189, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 2.453064441680908}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [190, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 2.4794130325317383}, "pipe": "data"}
+{"event": "line", "data": "Elapsed [0:01:16], Iteration [190/200000], D/loss_real: -1.7670, D/loss_fake: 0.7271, D/loss_cls: 3.3752, D/loss_gp: 0.0144, G/loss_fake: -0.2988, G/loss_rec: 0.4972, G/loss_cls: 3.3753\n", "pipe": "stdout"}
+{"event": "data", "data": {"task": "train", "rate": 40.185541971868425, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [191, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 3.106503486633301}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37442.375, 81920.0], "load": 1.0, "temperature": 78, "power": 309.893}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [192, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 3.14693546295166}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [193, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 2.9078567028045654}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [194, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 2.977015256881714}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [195, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 2.739900588989258}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.30069066735387, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [196, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 3.1128323078155518}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [197, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 2.965388059616089}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 28.856631387264535, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [198, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 3.1346616744995117}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [199, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.836603164672852}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [200, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 4.476594924926758}, "pipe": "data"}
+{"event": "line", "data": "Elapsed [0:01:19], Iteration [200/200000], D/loss_real: -0.1541, D/loss_fake: -0.0009, D/loss_cls: 3.2550, D/loss_gp: 0.1377, G/loss_fake: -1.0008, G/loss_rec: 0.4956, G/loss_cls: 3.3767\n", "pipe": "stdout"}
+{"event": "data", "data": {"task": "train", "rate": 40.4359290694734, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37442.375, 81920.0], "load": 1.0, "temperature": 79, "power": 317.084}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37442.375, 81920.0], "load": 1.0, "temperature": 79, "power": 317.084}}}, "pipe": "data"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan/voirconf-stargan.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/stargan/stargan/main.py", "--image_size", "512", "--c_dim", "5", "--batch_size", "16"], "time": 1712078980.6923656, "return_code": 0}, "pipe": null}
diff --git a/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/super-slomo.D0.data b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/super-slomo.D0.data
new file mode 100644
index 000000000..8b493eb4e
--- /dev/null
+++ b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/super-slomo.D0.data
@@ -0,0 +1,391 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/super-slomo", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "super-slomo", "install_group": "torch", "install_variant": "cuda", "run_name": "majaguma.2024-04-02_16:55:21.895752", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "b7a983fcf74c005cfc04d84913c69872", "tags": ["convnet", "unet", "video-interpolation", "vision"], "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/super-slomo", "plan": {"method": "per_gpu"}, "argv": {"--train_batch_size": 32}, "weight": 1.0, "name": "super-slomo", "tag": ["super-slomo", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "trustingspider", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-b19ad74c-adf3-683a-1bdd-0ce516ef4aae": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 74, "power": 107.427, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712078983.331632, "milabench": {"tag": "paice-v1-10-g1243bba", "commit": "1243bbae76bfc8105d553472dcfce834b54a723e", "date": "2024-04-02 12:02:23 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/super-slomo/voirconf-super-slomo.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/super-slomo/slomo/train.py", "--train_batch_size", "32"], "time": 1712078983.3483179}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "line", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead.\n", "pipe": "stderr"}
+{"event": "line", "data": " warnings.warn(\n", "pipe": "stderr"}
+{"event": "line", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=VGG16_Weights.IMAGENET1K_V1`. You can also use `weights=VGG16_Weights.DEFAULT` to get the most up-to-date weights.\n", "pipe": "stderr"}
+{"event": "line", "data": " warnings.warn(msg)\n", "pipe": "stderr"}
+{"event": "line", "data": "Epoch: 0\n", "pipe": "stdout"}
+{"event": "line", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/optim/lr_scheduler.py:136: UserWarning: Detected call of `lr_scheduler.step()` before `optimizer.step()`. In PyTorch 1.1.0 and later, you should call them in the opposite order: `optimizer.step()` before `lr_scheduler.step()`. Failure to do this will result in PyTorch skipping the first value of the learning rate schedule. See more details at https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate\n", "pipe": "stderr"}
+{"event": "line", "data": " warnings.warn(\"Detected call of `lr_scheduler.step()` before `optimizer.step()`. \"\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "train", "progress": [0, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1472.375, 81920.0], "load": 0, "temperature": 70, "power": 96.48}}}, "pipe": "data"}
+{"event": "line", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/functional.py:4296: UserWarning: Default grid_sample and affine_grid behavior has changed to align_corners=False since 1.3.0. Please specify align_corners=True if the old behavior is desired. See the documentation of grid_sample for details.\n", "pipe": "stderr"}
+{"event": "line", "data": " warnings.warn(\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "train", "loss": 328.47808837890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [1, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.46881103515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 1.0, "temperature": 74, "power": 285.378}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [2, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.4638977050781}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [3, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.4608459472656}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [4, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.4584655761719}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [5, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.4568786621094}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [6, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.4557800292969}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 0.81, "temperature": 74, "power": 280.3}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [7, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.45513916015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [8, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.45465087890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [9, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.4543762207031}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.23652711456088, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [10, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.4541320800781}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 0.8, "temperature": 75, "power": 324.06}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [11, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.45391845703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 38.224495820542586, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [12, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.45367431640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [13, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.45343017578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.36284911916846, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [14, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.45318603515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [15, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.45294189453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 1.0, "temperature": 76, "power": 190.737}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 47.261178450602266, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [16, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.45263671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 32.967581505926894, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [17, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.45233154296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [18, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.4519958496094}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 41.73784474584244, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [19, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.4516906738281}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 0.82, "temperature": 76, "power": 279.304}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [20, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.451416015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.594516605978924, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [21, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.4512023925781}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 39.28498117798238, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [22, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.4510803222656}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [23, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.4508972167969}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 38.10012373306887, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 1.0, "temperature": 76, "power": 283.518}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [24, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.4505920410156}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [25, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.4503479003906}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 39.86162470478901, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [26, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.4500427246094}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [27, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.4498291015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 47.27301512778587, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [28, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.4496154785156}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 0.82, "temperature": 77, "power": 254.367}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 39.372754421091535, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [29, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.44940185546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [30, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.4490966796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 38.05545757018502, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [31, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.44879150390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [32, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.4484558105469}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 43.02176004256121, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 1.0, "temperature": 75, "power": 167.074}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [33, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.44818115234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [34, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.4478454589844}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.77278801272933, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [35, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.447509765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 36.070490218961865, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [36, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.4471435546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [37, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.4467468261719}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 1.0, "temperature": 76, "power": 241.14}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 41.240082608292994, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [38, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.4464111328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [39, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.446044921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.139318454450525, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [40, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.445556640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 39.10544352442072, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [41, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.4450988769531}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 1.0, "temperature": 77, "power": 302.218}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [42, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.4447326660156}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 37.97631533159207, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [43, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.4442138671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [44, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.4437255859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 42.25052577462, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [45, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.4432373046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [46, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.44268798828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 46.807311462365455, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 1.0, "temperature": 77, "power": 102.077}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [47, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.44256591796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 35.98783215856734, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [48, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.4415283203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [49, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.4410095214844}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.26664140773931, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [50, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.4404296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 0.86, "temperature": 77, "power": 265.928}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 46.62171037120563, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [51, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.4397888183594}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [52, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.4390563964844}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 37.871864406161436, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [53, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.43829345703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [54, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.4375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 42.927191217804975, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [55, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.4366455078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 1.0, "temperature": 78, "power": 278.754}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [56, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.4357604980469}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.766062855286634, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [57, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.434814453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 36.057952936916436, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [58, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.4338073730469}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [59, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.4327697753906}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 38.72634373080457, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 1.0, "temperature": 77, "power": 143.281}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [60, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.431640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [61, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.430419921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 46.02601584300039, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [62, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.42913818359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 43.15053158548995, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [63, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.427734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [64, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.4263916015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 0.88, "temperature": 78, "power": 248.171}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 37.84231553301942, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [65, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.42498779296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [66, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.4234924316406}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.403488232377484, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [67, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 46.93665569757023, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [68, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.42022705078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 1.0, "temperature": 77, "power": 303.997}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [69, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.4185485839844}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 37.889177465296854, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [70, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.4167175292969}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [71, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.4147644042969}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.84517059816774, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [72, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.4128112792969}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.512036983718424, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [73, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.4107360839844}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 1.0, "temperature": 76, "power": 121.342}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [74, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.4085388183594}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 37.8865406947121, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [75, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.4062194824219}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [76, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.40386962890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.11664097539493, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [77, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.4015808105469}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 1.0, "temperature": 78, "power": 283.827}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [78, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.3990783691406}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 47.22254067513172, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [79, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.3963317871094}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 31.92621016364475, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [80, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.39337158203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [81, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 42.22786114018701, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [82, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.3876647949219}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 1.0, "temperature": 78, "power": 274.432}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [83, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.38421630859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 46.730715629739734, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [84, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.381103515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 36.120471910127705, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [85, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.37744140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [86, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.3735046386719}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 43.69360569573131, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 1.0, "temperature": 78, "power": 281.407}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [87, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.36956787109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [88, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.3650207519531}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 47.11270386926677, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [89, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.36029052734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 33.29918068770727, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [90, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.3553466796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [91, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.3497314453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 1.0, "temperature": 78, "power": 142.116}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 46.90459676620552, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [92, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.34381103515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 43.53086839718483, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [93, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.3377685546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [94, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.3302917480469}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 37.969881187928614, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [95, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.32183837890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 0.94, "temperature": 78, "power": 203.124}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [96, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.3136901855469}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 47.05122658993147, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [97, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.3064270019531}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 43.406025870569266, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [98, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.30010986328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [99, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.2879943847656}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 38.13451305722688, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 1.0, "temperature": 79, "power": 241.535}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [100, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.28253173828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [101, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.27197265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 41.708231471785176, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [102, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.2642517089844}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [103, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.25408935546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.47671511193946, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [104, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.24456787109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 0.82, "temperature": 78, "power": 354.791}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 38.97211604164333, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [105, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.2336120605469}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [106, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.2249755859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 37.81424912080379, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [107, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.212646484375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [108, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.2038879394531}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 42.0980645550324, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 1.0, "temperature": 78, "power": 246.402}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [109, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.19232177734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [110, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.1824035644531}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 46.88736886730239, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [111, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 328.1700439453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 36.231083708737366, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 1.0, "temperature": 78, "power": 249.297}}}, "pipe": "data"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/super-slomo/voirconf-super-slomo.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/super-slomo/slomo/train.py", "--train_batch_size", "32"], "time": 1712079066.8635077, "return_code": 0}, "pipe": null}
diff --git a/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/t5.D0.data b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/t5.D0.data
new file mode 100644
index 000000000..e1c189b27
--- /dev/null
+++ b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/t5.D0.data
@@ -0,0 +1,590 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "hf", "install_group": "torch", "install_variant": "cuda", "run_name": "majaguma.2024-04-02_16:55:21.895752", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "b7a983fcf74c005cfc04d84913c69872", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "argv": {"--precision": "tf32-fp16", "--num-workers": 8, "--model": "T5", "--batch-size": 16}, "plan": {"method": "per_gpu"}, "tags": ["huggingface", "language-modeling", "nlp", "transformer"], "weight": 2.0, "name": "t5", "tag": ["t5", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "trustingspider", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-b19ad74c-adf3-683a-1bdd-0ce516ef4aae": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 73, "power": 104.497, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712078035.439823, "milabench": {"tag": "paice-v1-10-g1243bba", "commit": "1243bbae76bfc8105d553472dcfce834b54a723e", "date": "2024-04-02 12:02:23 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-t5.D0-0efae956f1553a76c1e03985181900f5.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "T5", "--batch-size", "16"], "time": 1712078035.4563587}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 10.970378875732422}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [1, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 10.800580024719238}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [2, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 10.727293014526367}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [3, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 10.682456970214844}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 0.98, "temperature": 73, "power": 301.633}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [4, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 10.654926300048828}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [5, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 10.628446578979492}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [6, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 10.61573600769043}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [7, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 10.587346076965332}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [8, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 10.556131362915039}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [9, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 10.513788223266602}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [10, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 10.457639694213867}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [11, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 10.321690559387207}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [12, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 10.367353439331055}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [13, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 10.313187599182129}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 0.98, "temperature": 73, "power": 300.274}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [14, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 10.28883171081543}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [15, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 10.212865829467773}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [16, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 10.028278350830078}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [17, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.84654426574707}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [18, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 10.015433311462402}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [19, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.654990196228027}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.513398761524286, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [20, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.576047897338867}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [21, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.431913375854492}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [22, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.35800552368164}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 1.0, "temperature": 74, "power": 296.81}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 47.161618009236065, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [23, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.306499481201172}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [24, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 9.0736665725708}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [25, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 8.939047813415527}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [26, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 8.910099029541016}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 46.8499263906401, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [27, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 8.83672046661377}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [28, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 8.653719902038574}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [29, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 8.481781959533691}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 43.3078586854502, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [30, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 8.48547649383545}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [31, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 8.421192169189453}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 0.98, "temperature": 75, "power": 306.632}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [32, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 8.146132469177246}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 46.73386027344551, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [33, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 8.017644882202148}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [34, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.908407211303711}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [35, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.934696197509766}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [36, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.747555732727051}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 47.01896724494396, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [37, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.655747413635254}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [38, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.503643035888672}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [39, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.347489356994629}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 43.346884981362464, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [40, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.31602668762207}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 0.98, "temperature": 75, "power": 226.302}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [41, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.231689929962158}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [42, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.05312442779541}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 48.297648020757606, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [43, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9287261962890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [44, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.817322731018066}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [45, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.710197448730469}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 48.77194944243633, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [46, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.60150671005249}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [47, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.518242359161377}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [48, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.411893844604492}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [49, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.325069427490234}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 46.702023231180064, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [50, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.233058929443359}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 1.0, "temperature": 75, "power": 232.549}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [51, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.172609329223633}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [52, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.097485542297363}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 43.47617582463295, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [53, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.041872501373291}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [54, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 5.932075023651123}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [55, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 5.845073699951172}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 47.08195618693227, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [56, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 5.775830268859863}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [57, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 5.705239295959473}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [58, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 5.633525848388672}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [59, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 5.575502395629883}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 46.66782433496777, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 0.98, "temperature": 75, "power": 324.204}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [60, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 5.5167365074157715}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [61, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 5.423802852630615}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [62, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 5.354887962341309}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 43.27502777648228, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [63, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 5.272907257080078}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [64, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 5.22577428817749}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [65, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 5.148532390594482}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 47.302127354542854, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [66, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 5.052487373352051}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [67, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 5.000744819641113}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [68, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 4.897304534912109}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 0.98, "temperature": 76, "power": 323.043}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [69, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 4.836282253265381}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 46.4645535736731, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [70, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 4.77700138092041}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [71, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 4.716606140136719}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [72, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 4.660195827484131}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 43.298838210354056, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [73, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 4.5612311363220215}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [74, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 4.502846717834473}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [75, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 4.482702732086182}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 47.21506665591461, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [76, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 4.492550373077393}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [77, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 4.360192775726318}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 1.0, "temperature": 76, "power": 292.325}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [78, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 4.2910284996032715}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [79, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 4.2590203285217285}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 46.78574846333013, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [80, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 4.185894012451172}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [81, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 4.096889972686768}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [82, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 4.042828559875488}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 43.32153184709173, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [83, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 3.9600980281829834}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [84, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 3.8612828254699707}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [85, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 3.813199996948242}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 47.48145742501573, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [86, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 3.774353265762329}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [87, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 3.680110216140747}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 0.98, "temperature": 76, "power": 285.975}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [88, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 3.641136646270752}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 49.08007362170554, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [89, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 3.5967588424682617}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [90, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 3.522066354751587}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [91, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 3.439159870147705}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [92, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 3.3564889430999756}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.58709495158289, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [93, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 3.3717949390411377}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [94, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 3.323021650314331}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [95, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 3.2349772453308105}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.32145139087524, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [96, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 3.192401647567749}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 1.0, "temperature": 76, "power": 230.536}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [97, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 3.1241087913513184}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [98, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 3.0823092460632324}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 46.79780897923113, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [99, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 2.988307476043701}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [100, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 2.9341442584991455}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [101, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 2.8254554271698}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [102, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 2.76162052154541}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 47.72196650215915, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [103, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 2.763589859008789}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [104, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 2.7407455444335938}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [105, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 2.672053337097168}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 42.68777841024662, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 0.99, "temperature": 76, "power": 304.881}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [106, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 2.585357189178467}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [107, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 2.5391626358032227}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [108, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 2.4440035820007324}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 48.65403305570211, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [109, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 2.3648884296417236}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [110, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 2.2605745792388916}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [111, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 2.2033910751342773}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 47.074898776610475, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [112, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 2.1607017517089844}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [113, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 2.115994691848755}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [114, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 2.066122055053711}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 0.98, "temperature": 76, "power": 284.816}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [115, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 1.993964672088623}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 47.32041072510573, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [116, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 2.0019028186798096}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [117, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 1.9345535039901733}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [118, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 1.8910869359970093}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 42.86588558414641, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [119, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 1.861147165298462}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [120, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 1.8138902187347412}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [121, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 1.7461824417114258}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 49.112481089674006, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [122, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 1.6692562103271484}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [123, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 1.625261664390564}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [124, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 1.5834534168243408}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 0.98, "temperature": 76, "power": 298.731}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 47.0665671794488, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [125, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 1.5272022485733032}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [126, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 1.459859013557434}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [127, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 1.4157166481018066}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [128, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 1.3812469244003296}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 47.158964098619705, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [129, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 1.3119561672210693}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [130, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 1.2564345598220825}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [131, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 1.2277307510375977}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 42.989731441401226, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [132, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 1.1866565942764282}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [133, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 1.1411606073379517}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 1.0, "temperature": 77, "power": 300.815}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [134, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 1.083279013633728}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 49.16440041655849, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [135, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 1.0341026782989502}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [136, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.9987545013427734}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [137, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.9641054272651672}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 47.63416494507367, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [138, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.9365309476852417}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [139, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.8940635919570923}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [140, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.8575931787490845}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [141, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.8317781686782837}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 46.94255042174156, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [142, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.7994922995567322}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 0.98, "temperature": 76, "power": 304.949}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [143, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.7648830413818359}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [144, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.7411973476409912}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 43.168463253269245, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [145, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.7056658267974854}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [146, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6834872364997864}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [147, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6573166847229004}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 48.28341553759186, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [148, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6341385245323181}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [149, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6199974417686462}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [150, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.5888203978538513}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 48.530540233582144, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [151, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.5731779932975769}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 0.98, "temperature": 76, "power": 242.951}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [152, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.5541250109672546}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [153, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.536582350730896}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [154, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.5135989785194397}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 46.823796543761894, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [155, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.4957291781902313}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [156, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.47861233353614807}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [157, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.47117674350738525}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 43.206086864876305, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [158, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.4504289925098419}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [159, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.43357139825820923}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [160, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.4255506992340088}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 46.78724376210535, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [161, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.41500476002693176}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 1.0, "temperature": 76, "power": 309.84}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [162, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.4001453220844269}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [163, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.3856187164783478}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [164, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.37681058049201965}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 46.42934239142913, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [165, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.3642743229866028}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [166, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.3536361753940582}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [167, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.34521564841270447}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 43.34800795854977, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [168, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.3393672704696655}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [169, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.3293442130088806}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [170, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.32399898767471313}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 0.98, "temperature": 76, "power": 311.488}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 47.0730700556139, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [171, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.3113195300102234}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [172, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.3049860894680023}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [173, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.2919404208660126}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [174, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.28530269861221313}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 47.52107161537166, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [175, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.2786383628845215}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [176, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.27610689401626587}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [177, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.2684989273548126}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 42.92595580884994, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [178, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.2630639374256134}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [179, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.2509020268917084}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 1.0, "temperature": 77, "power": 303.596}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [180, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.24854587018489838}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 48.96482064926789, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [181, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.24656544625759125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [182, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.23619654774665833}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [183, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.23337531089782715}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 46.993269944999156, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [184, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.22808784246444702}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [185, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.22517065703868866}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [186, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.21650603413581848}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [187, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.21727493405342102}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 47.498195881220965, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [188, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.21555562317371368}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 1.0, "temperature": 77, "power": 285.4}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [189, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.20680122077465057}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [190, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.20380568504333496}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 42.982066780323784, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [191, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.2012479454278946}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [192, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.1972467303276062}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [193, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.1940830945968628}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 48.91166107588888, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [194, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.1896078884601593}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [195, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.18643631041049957}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [196, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.18215446174144745}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 46.89319703333237, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [197, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.17853213846683502}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [198, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.17645688354969025}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 1.0, "temperature": 76, "power": 288.788}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [199, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.1725698709487915}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [200, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.16997073590755463}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 47.55208540216636, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [201, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.16654980182647705}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [202, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.1624709963798523}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [203, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.16435472667217255}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 42.85558406180044, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [204, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.16150528192520142}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [205, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.15788640081882477}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [206, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.15364141762256622}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 49.054422957401265, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [207, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.15300226211547852}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 0.98, "temperature": 77, "power": 301.966}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [208, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.1467810720205307}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [209, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.14508351683616638}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 47.017092172493605, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [210, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.14274908602237701}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [211, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.14380408823490143}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [212, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.14112578332424164}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [213, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.1418134719133377}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 47.27492552184506, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [214, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.13803264498710632}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [215, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.13522985577583313}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [216, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.13362157344818115}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 43.10779637223394, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 1.0, "temperature": 77, "power": 279.531}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 0.98, "temperature": 77, "power": 297.292}}}, "pipe": "data"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-t5.D0-0efae956f1553a76c1e03985181900f5.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "T5", "--batch-size", "16"], "time": 1712078110.2958682, "return_code": 0}, "pipe": null}
diff --git a/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/tf32.D0.data b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/tf32.D0.data
new file mode 100644
index 000000000..7b743b550
--- /dev/null
+++ b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/tf32.D0.data
@@ -0,0 +1,124 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/flops", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "flops", "install_group": "torch", "install_variant": "cuda", "run_name": "majaguma.2024-04-02_16:55:21.895752", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "b7a983fcf74c005cfc04d84913c69872", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops", "plan": {"method": "per_gpu"}, "tags": ["diagnostic", "flops"], "argv": {"--number": 10, "--repeat": 90, "--m": 8192, "--n": 8192, "--dtype": "fp32", "--tf32": true}, "weight": 0.0, "name": "tf32", "tag": ["tf32", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "trustingspider", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-b19ad74c-adf3-683a-1bdd-0ce516ef4aae": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 69, "power": 99.705, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712077285.652197, "milabench": {"tag": "paice-v1-10-g1243bba", "commit": "1243bbae76bfc8105d553472dcfce834b54a723e", "date": "2024-04-02 12:02:23 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "10", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "fp32", "--tf32"], "time": 1712077285.6617703}, "pipe": null}
+{"event": "data", "data": {"task": "train", "rate": 118.32754788274686, "units": "Tflops", "t": 1712077287.5160294}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [882.5, 81920.0], "load": 0, "temperature": 66, "power": 60.775}}, "t": 1712077286.950776}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 0, "temperature": 70, "power": 327.388}}, "t": 1712077287.4610522}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 135.35934213074535, "units": "Tflops", "t": 1712077287.6792314}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 134.88899180073525, "units": "Tflops", "t": 1712077287.842325}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 134.8939237565598, "units": "Tflops", "t": 1712077288.0054052}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 72, "power": 306.901}}, "t": 1712077287.9707563}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 133.95063700383807, "units": "Tflops", "t": 1712077288.1696808}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 129.64735369900728, "units": "Tflops", "t": 1712077288.3393598}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 134.00863381296466, "units": "Tflops", "t": 1712077288.5035186}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 72, "power": 301.94}}, "t": 1712077288.4834569}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 133.43246021788087, "units": "Tflops", "t": 1712077288.668445}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 133.787619985593, "units": "Tflops", "t": 1712077288.8328652}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 133.66566579938606, "units": "Tflops", "t": 1712077288.9974337}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 73, "power": 294.924}}, "t": 1712077288.9936674}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 133.46489777251216, "units": "Tflops", "t": 1712077289.1623187}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 134.11288171634845, "units": "Tflops", "t": 1712077289.3263476}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 133.35490575102946, "units": "Tflops", "t": 1712077289.4913075}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 133.88958056283872, "units": "Tflops", "t": 1712077289.655615}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 73, "power": 288.197}}, "t": 1712077289.5035207}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 133.55959745279375, "units": "Tflops", "t": 1712077289.820384}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 133.61919178143685, "units": "Tflops", "t": 1712077289.9850082}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 134.07681503516815, "units": "Tflops", "t": 1712077290.1490693}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 73, "power": 294.539}}, "t": 1712077290.0134768}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 133.46412526650184, "units": "Tflops", "t": 1712077290.3139725}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 133.46258028130885, "units": "Tflops", "t": 1712077290.4787962}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 134.0678501148285, "units": "Tflops", "t": 1712077290.642876}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 73, "power": 295.798}}, "t": 1712077290.5233986}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 133.79984705472003, "units": "Tflops", "t": 1712077290.8073256}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 133.39097086508684, "units": "Tflops", "t": 1712077290.9722438}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 133.2784043196295, "units": "Tflops", "t": 1712077291.1372902}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 73, "power": 295.909}}, "t": 1712077291.0319536}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 133.23142934316905, "units": "Tflops", "t": 1712077291.3024626}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 133.28803442913426, "units": "Tflops", "t": 1712077291.4674945}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 133.49290715438087, "units": "Tflops", "t": 1712077291.6322744}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 73, "power": 296.591}}, "t": 1712077291.541029}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 133.2689681623087, "units": "Tflops", "t": 1712077291.7973952}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 133.29015323998885, "units": "Tflops", "t": 1712077291.9624372}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 134.25949242926333, "units": "Tflops", "t": 1712077292.1262753}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 74, "power": 296.483}}, "t": 1712077292.049293}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 133.29920709977563, "units": "Tflops", "t": 1712077292.2913465}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 133.24855982153497, "units": "Tflops", "t": 1712077292.4564307}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 133.4604559850726, "units": "Tflops", "t": 1712077292.621264}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 74, "power": 295.898}}, "t": 1712077292.5587559}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 133.21103249702153, "units": "Tflops", "t": 1712077292.7864578}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 133.18891090514808, "units": "Tflops", "t": 1712077292.9516141}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 133.19852808572966, "units": "Tflops", "t": 1712077293.116764}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 74, "power": 296.304}}, "t": 1712077293.0668132}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 133.2418225910361, "units": "Tflops", "t": 1712077293.281912}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 133.23008218901032, "units": "Tflops", "t": 1712077293.4470382}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 133.23393127317235, "units": "Tflops", "t": 1712077293.6121461}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 74, "power": 295.324}}, "t": 1712077293.577403}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 133.22007561093127, "units": "Tflops", "t": 1712077293.7773101}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 133.22238468784028, "units": "Tflops", "t": 1712077293.9424372}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 133.24913733013733, "units": "Tflops", "t": 1712077294.1075234}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 74, "power": 296.676}}, "t": 1712077294.0850124}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 133.2308519880508, "units": "Tflops", "t": 1712077294.27268}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 133.2412451458368, "units": "Tflops", "t": 1712077294.4377837}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 133.26184378600908, "units": "Tflops", "t": 1712077294.6028519}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 74, "power": 296.978}}, "t": 1712077294.5958502}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 133.27281949370183, "units": "Tflops", "t": 1712077294.7679858}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 133.25202494823964, "units": "Tflops", "t": 1712077294.9330666}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 133.23393127317235, "units": "Tflops", "t": 1712077295.0981672}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 133.25106239496628, "units": "Tflops", "t": 1712077295.2632587}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 74, "power": 297.17}}, "t": 1712077295.1029696}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 133.22835017369363, "units": "Tflops", "t": 1712077295.428415}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 133.1900648934909, "units": "Tflops", "t": 1712077295.5935786}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 133.20910858862644, "units": "Tflops", "t": 1712077295.7587082}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 74, "power": 297.854}}, "t": 1712077295.6141336}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 133.16718119210032, "units": "Tflops", "t": 1712077295.9239538}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 133.22873506209447, "units": "Tflops", "t": 1712077296.089061}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 133.21314886044416, "units": "Tflops", "t": 1712077296.2541945}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 74, "power": 297.372}}, "t": 1712077296.1212466}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 133.18141046831494, "units": "Tflops", "t": 1712077296.4194238}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 133.18852624681085, "units": "Tflops", "t": 1712077296.584579}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 133.2208452943404, "units": "Tflops", "t": 1712077296.7497036}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 74, "power": 297.359}}, "t": 1712077296.6309273}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 133.27204920961543, "units": "Tflops", "t": 1712077296.9148002}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 133.21776661406503, "units": "Tflops", "t": 1712077297.079921}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 133.23008218901032, "units": "Tflops", "t": 1712077297.2450335}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 74, "power": 297.646}}, "t": 1712077297.1380339}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 133.2437474445157, "units": "Tflops", "t": 1712077297.4101856}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 133.26781280629592, "units": "Tflops", "t": 1712077297.5752518}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 133.2466348290137, "units": "Tflops", "t": 1712077297.7403445}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 74, "power": 295.528}}, "t": 1712077297.6469982}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 133.2198831914686, "units": "Tflops", "t": 1712077297.9055262}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 133.24528736734194, "units": "Tflops", "t": 1712077298.070622}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 133.25857067930923, "units": "Tflops", "t": 1712077298.2357001}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 75, "power": 295.815}}, "t": 1712077298.1541488}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 133.2225771145292, "units": "Tflops", "t": 1712077298.400884}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 133.25009985559896, "units": "Tflops", "t": 1712077298.5659637}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 133.26685002492098, "units": "Tflops", "t": 1712077298.7310297}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 75, "power": 296.099}}, "t": 1712077298.663185}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 133.21391846381613, "units": "Tflops", "t": 1712077298.896213}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 133.24855982153497, "units": "Tflops", "t": 1712077299.0613177}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 133.2266182034094, "units": "Tflops", "t": 1712077299.2264338}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 75, "power": 296.978}}, "t": 1712077299.1703143}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 133.22565601715377, "units": "Tflops", "t": 1712077299.3915896}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 133.2547201713875, "units": "Tflops", "t": 1712077299.5566738}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 133.2787894972917, "units": "Tflops", "t": 1712077299.7217174}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 75, "power": 297.552}}, "t": 1712077299.6812122}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 133.24740481934785, "units": "Tflops", "t": 1712077299.88685}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 133.21853627079372, "units": "Tflops", "t": 1712077300.0519881}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 133.24875232384622, "units": "Tflops", "t": 1712077300.2170668}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 75, "power": 298.442}}, "t": 1712077300.188352}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 133.23720316957952, "units": "Tflops", "t": 1712077300.3822114}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 133.18833391847545, "units": "Tflops", "t": 1712077300.5473757}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 133.20814665526834, "units": "Tflops", "t": 1712077300.7125075}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 75, "power": 298.138}}, "t": 1712077300.6968179}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 133.2035295685289, "units": "Tflops", "t": 1712077300.8777108}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 133.23701070063657, "units": "Tflops", "t": 1712077301.0428088}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 133.23662576441882, "units": "Tflops", "t": 1712077301.2079144}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 75, "power": 299.509}}, "t": 1712077301.203931}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 133.27320463908413, "units": "Tflops", "t": 1712077301.373013}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 133.27994504363647, "units": "Tflops", "t": 1712077301.5380626}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 133.2541426143916, "units": "Tflops", "t": 1712077301.703145}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 133.2866861300266, "units": "Tflops", "t": 1712077301.868178}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 75, "power": 298.825}}, "t": 1712077301.711091}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 133.25144741460687, "units": "Tflops", "t": 1712077302.0333116}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 133.3003630001962, "units": "Tflops", "t": 1712077302.1983297}, "pipe": "data"}
+{"event": "end", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "10", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "fp32", "--tf32"], "time": 1712077302.660153, "return_code": 0}, "pipe": null}
diff --git a/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/whisper.D0.data b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/whisper.D0.data
new file mode 100644
index 000000000..458e3853a
--- /dev/null
+++ b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/whisper.D0.data
@@ -0,0 +1,684 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "hf", "install_group": "torch", "install_variant": "cuda", "run_name": "majaguma.2024-04-02_16:55:21.895752", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "b7a983fcf74c005cfc04d84913c69872", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "argv": {"--precision": "tf32-fp16", "--num-workers": 8, "--model": "Whisper", "--batch-size": 64}, "plan": {"method": "per_gpu"}, "tags": ["audio", "huggingface"], "weight": 1.0, "name": "whisper", "tag": ["whisper", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "trustingspider", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-b19ad74c-adf3-683a-1bdd-0ce516ef4aae": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 73, "power": 105.874, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712078191.204879, "milabench": {"tag": "paice-v1-10-g1243bba", "commit": "1243bbae76bfc8105d553472dcfce834b54a723e", "date": "2024-04-02 12:02:23 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-whisper.D0-0efae956f1553a76c1e03985181900f5.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "Whisper", "--batch-size", "64"], "time": 1712078191.221243}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1360.375, 81920.0], "load": 0, "temperature": 69, "power": 94.767}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1360.375, 81920.0], "load": 0, "temperature": 68, "power": 93.365}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6992721557617188}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [1, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 1.4653310775756836}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [2, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.970947265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [3, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6924057006835938}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 0.92, "temperature": 72, "power": 301.855}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [4, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.7043609619140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [5, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6922607421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [6, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6987991333007812}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [7, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6994171142578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [8, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6912460327148438}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [9, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6975479125976562}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [10, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6967544555664062}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [11, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6912155151367188}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [12, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6952743530273438}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [13, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6956329345703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [14, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.691314697265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 0.91, "temperature": 73, "power": 144.457}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [15, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6933135986328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [16, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6947784423828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [17, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6918106079101562}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [18, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6918716430664062}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [19, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6938247680664062}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [20, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6923065185546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [21, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.69122314453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [22, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6926803588867188}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [23, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6926116943359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 234.2913916211188, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [24, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6912689208984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [25, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6917190551757812}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 0.92, "temperature": 73, "power": 330.102}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [26, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.69244384765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 213.04746175148657, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [27, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6915664672851562}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [28, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6912841796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [29, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6919631958007812}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [30, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6917724609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 215.72370533201257, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [31, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6912002563476562}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [32, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6915283203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [33, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.691802978515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [34, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6913375854492188}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 216.85477238350038, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [35, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6912460327148438}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [36, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.691558837890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 1.0, "temperature": 74, "power": 338.011}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [37, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6914215087890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [38, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6911849975585938}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 216.6530419972232, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [39, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.691375732421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [40, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.69146728515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [41, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6912155151367188}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [42, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6912612915039062}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 216.90345245547238, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [43, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6913833618164062}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [44, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6912765502929688}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [45, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6911773681640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [46, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6912918090820312}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 215.8453820003199, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [47, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.691314697265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 1.0, "temperature": 74, "power": 292.171}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [48, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6911697387695312}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [49, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6912689208984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [50, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6912460327148438}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 216.11544837275375, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [51, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.691192626953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [52, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6912002563476562}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [53, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6912155151367188}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [54, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6912307739257812}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 217.35952419259772, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [55, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6911773681640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [56, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6912689208984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [57, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6912155151367188}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [58, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6911697387695312}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 0.98, "temperature": 75, "power": 298.153}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 215.56933978331955, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [59, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6911849975585938}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [60, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6912384033203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [61, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6911849975585938}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [62, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.691192626953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 215.5706106402332, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [63, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6912307739257812}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [64, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6912002563476562}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [65, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6911849975585938}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [66, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6912078857421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 216.1818606460464, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [67, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.691162109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [68, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6911544799804688}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [69, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6912078857421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 0.92, "temperature": 74, "power": 332.135}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [70, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6911697387695312}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 216.31044024012843, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [71, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6911849975585938}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [72, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6912002563476562}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [73, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6912002563476562}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [74, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6911849975585938}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 216.40257392467467, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [75, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6912078857421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [76, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6912078857421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [77, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6911468505859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [78, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6911849975585938}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 215.95571962941324, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [79, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6911773681640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [80, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.691131591796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 0.92, "temperature": 75, "power": 280.408}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [81, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.691162109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [82, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6912002563476562}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 215.70580108910144, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [83, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6911849975585938}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [84, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6911773681640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [85, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6911697387695312}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [86, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.691131591796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 215.36098047474337, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [87, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6911697387695312}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [88, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6911468505859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [89, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6911697387695312}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [90, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6911849975585938}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 215.20621401150512, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [91, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6911697387695312}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 0.91, "temperature": 76, "power": 320.074}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [92, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6911773681640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [93, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6911773681640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [94, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6911544799804688}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 213.72205777988853, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [95, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6911849975585938}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [96, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6911773681640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [97, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6911697387695312}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [98, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6911697387695312}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 212.87064656556868, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [99, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.691192626953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [100, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6911773681640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [101, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6911849975585938}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [102, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6911697387695312}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 0.91, "temperature": 75, "power": 323.693}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 213.08866226770925, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [103, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.691162109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [104, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6911392211914062}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [105, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6911239624023438}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [106, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6911773681640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 214.04680735114928, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [107, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6911239624023438}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [108, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6911773681640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [109, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6911392211914062}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [110, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6911849975585938}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 214.0307817080435, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [111, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6911392211914062}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [112, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.691162109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [113, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6911392211914062}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 0.91, "temperature": 76, "power": 272.571}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [114, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.69110107421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 214.37628234333806, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [115, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6911773681640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [116, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.691192626953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [117, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6911163330078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [118, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.691162109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 214.3267088734617, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [119, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.691131591796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [120, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6911697387695312}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [121, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6911468505859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [122, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6911544799804688}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 214.32214067767083, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [123, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6911697387695312}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [124, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6911544799804688}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 0.92, "temperature": 76, "power": 157.032}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [125, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6911544799804688}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [126, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6911163330078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 215.30861548038735, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [127, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6911239624023438}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [128, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.691131591796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [129, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6911239624023438}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [130, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6911087036132812}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 215.16081136691471, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [131, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6911392211914062}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [132, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6911087036132812}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [133, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6911239624023438}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [134, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6911239624023438}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 215.16926617279907, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [135, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6911239624023438}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 0.92, "temperature": 76, "power": 257.02}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [136, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6911163330078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [137, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.69110107421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [138, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6911697387695312}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 214.47607760304805, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [139, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6911773681640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [140, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6910781860351562}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [141, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6911163330078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [142, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6911163330078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 214.66688120832552, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [143, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6910781860351562}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [144, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6911087036132812}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [145, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6910629272460938}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [146, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6910858154296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 0.92, "temperature": 77, "power": 336.298}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 214.50669730170878, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [147, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6910629272460938}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [148, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6910552978515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [149, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6910476684570312}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [150, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6910552978515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 213.16736779342986, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [151, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6910476684570312}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [152, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6910247802734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [153, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.691009521484375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [154, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6909866333007812}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 213.19906234053013, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [155, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6909027099609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [156, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6908340454101562}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [157, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6912689208984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 0.93, "temperature": 75, "power": 339.327}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [158, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6919326782226562}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 213.9279660816675, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [159, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6914596557617188}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [160, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6910934448242188}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [161, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6914291381835938}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [162, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.691009521484375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 214.10770201189231, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [163, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.69134521484375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [164, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.691009521484375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [165, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6911544799804688}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [166, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.69110107421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 214.52691945922763, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [167, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6910018920898438}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [168, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6912841796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 0.98, "temperature": 77, "power": 307.795}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [169, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6910476684570312}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [170, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.691253662109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 213.18414582451112, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [171, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6910247802734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [172, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6911849975585938}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [173, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6910247802734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [174, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6911163330078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 214.57904893467582, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [175, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6910324096679688}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [176, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6910324096679688}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [177, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6910858154296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [178, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6909027099609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 214.22158187895425, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [179, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6909942626953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 1.0, "temperature": 76, "power": 283.237}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [180, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6908721923828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [181, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6908798217773438}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [182, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6908721923828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 213.49812755637112, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [183, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6906585693359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [184, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6905517578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [185, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6905288696289062}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [186, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6909103393554688}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 212.6168404494011, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [187, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6955337524414062}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [188, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.7002067565917969}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [189, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.697906494140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [190, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6913986206054688}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 1.0, "temperature": 76, "power": 281.872}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 212.1268088992443, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [191, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6951828002929688}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [192, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6938323974609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [193, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6914520263671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [194, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6948165893554688}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 213.14551093007182, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [195, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6912918090820312}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [196, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6923904418945312}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [197, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.693206787109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [198, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6908798217773438}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 214.4679434605734, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [199, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6921310424804688}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [200, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6922607421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [201, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6906814575195312}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 1.0, "temperature": 76, "power": 273.272}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [202, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6915359497070312}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 213.3693376309808, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [203, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6916046142578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [204, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6903610229492188}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [205, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6906814575195312}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [206, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.69073486328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 213.676335311893, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [207, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6897354125976562}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [208, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6897735595703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [209, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6895599365234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [210, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6880340576171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 213.74458646748022, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [211, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6874923706054688}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [212, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6850967407226562}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 0.92, "temperature": 77, "power": 286.651}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [213, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6821975708007812}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [214, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.67584228515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 215.33226381498915, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [215, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6980819702148438}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [216, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.832855224609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [217, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.7104606628417969}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [218, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.708587646484375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 213.93003322162104, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [219, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6919479370117188}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [220, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6960067749023438}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [221, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6996231079101562}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [222, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6961898803710938}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 215.1493433197456, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [223, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6914520263671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 1.0, "temperature": 77, "power": 278.47}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [224, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.69287109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [225, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6962814331054688}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [226, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6955718994140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 215.7189868666101, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [227, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6918563842773438}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [228, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.691741943359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [229, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.693878173828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [230, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6942672729492188}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 215.4194120149196, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [231, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6923370361328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [232, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6912078857421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [233, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.69189453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [234, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6931381225585938}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 0.91, "temperature": 77, "power": 295.422}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 215.5700406497057, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [235, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6926040649414062}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [236, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6917266845703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [237, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.691192626953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [238, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6917572021484375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 216.9245137781287, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [239, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.69232177734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [240, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6920166015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [241, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.691314697265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [242, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.691253662109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 215.7145527116028, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [243, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.69140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [244, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.69171142578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [245, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6917266845703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 0.92, "temperature": 77, "power": 281.275}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [246, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6913909912109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 215.4835698946194, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [247, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6912689208984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [248, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.691314697265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [249, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6914443969726562}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [250, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6915283203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 213.8765232032213, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [251, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6913299560546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [252, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6910629272460938}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [253, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6912384033203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [254, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.691314697265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 215.67232322291449, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [255, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6914215087890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [256, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.691314697265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 0.91, "temperature": 76, "power": 307.366}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [257, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6912460327148438}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [258, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6911239624023438}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 215.40894191513954, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [259, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6911849975585938}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [260, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6912384033203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [261, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6912689208984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [262, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 0.6912612915039062}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 214.38790475698414, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 0.96, "temperature": 77, "power": 309.046}}}, "pipe": "data"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-whisper.D0-0efae956f1553a76c1e03985181900f5.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "Whisper", "--batch-size", "64"], "time": 1712078273.8594427, "return_code": 0}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/README.md b/paice-v1-11-g010135f/NVIDIA_A10-24Q/README.md
new file mode 100644
index 000000000..48fc2f8e1
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/README.md
@@ -0,0 +1,38 @@
+```
+=================
+Benchmark results
+=================
+ fail n perf sem% std% peak_memory score weight
+bert-fp16 4 4 NaN NaN NaN 24000 NaN 0.00
+bert-fp32 4 4 NaN NaN NaN 23304 NaN 0.00
+bert-tf32 4 4 NaN NaN NaN 23304 NaN 0.00
+bert-tf32-fp16 4 4 NaN NaN NaN 24000 NaN 3.00
+bf16 0 4 91.87 0.1% 1.4% 3098 183.777391 0.00
+convnext_large-fp16 4 4 NaN NaN NaN 24394 NaN 0.00
+convnext_large-fp32 4 4 NaN NaN NaN 24430 NaN 0.00
+convnext_large-tf32 4 4 NaN NaN NaN 24430 NaN 0.00
+convnext_large-tf32-fp16 4 4 NaN NaN NaN 24470 NaN 3.00
+davit_large 4 4 NaN NaN NaN 24438 NaN 1.00
+davit_large-multi 2 2 NaN NaN NaN 24366 NaN 5.00
+dlrm 0 2 376081.29 0.1% 1.4% 5996 376081.290012 1.00
+focalnet 0 4 146.78 1.0% 15.0% 24468 293.712272 2.00
+fp16 0 4 92.92 0.1% 1.1% 3098 185.826273 0.00
+fp32 0 4 15.61 0.1% 1.4% 3476 31.219423 0.00
+llama 4 4 NaN NaN NaN -1 NaN 1.00
+reformer 4 4 NaN NaN NaN 23556 NaN 1.00
+regnet_y_128gf 4 4 NaN NaN NaN 24450 NaN 2.00
+resnet152 4 4 NaN NaN NaN 24458 NaN 1.00
+resnet152-multi 2 2 NaN NaN NaN 24470 NaN 5.00
+resnet50 0 4 546.80 0.5% 8.1% 5838 1094.496142 1.00
+rwkv 4 4 NaN NaN NaN 3976 NaN 1.00
+stargan 4 4 NaN NaN NaN 24384 NaN 1.00
+super-slomo 4 4 NaN NaN NaN 24458 NaN 1.00
+t5 4 4 NaN NaN NaN 24098 NaN 2.00
+tf32 0 4 44.61 0.1% 1.0% 3476 89.225443 0.00
+whisper 4 4 NaN NaN NaN 23124 NaN 1.00
+
+Scores
+------
+Failure rate: 74.51% (FAIL)
+Score: 2.65
+```
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/badge.svg b/paice-v1-11-g010135f/NVIDIA_A10-24Q/badge.svg
new file mode 100644
index 000000000..726e6f405
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/badge.svg
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/bert-fp16.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/bert-fp16.D0.data
new file mode 100644
index 000000000..96b27a461
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/bert-fp16.D0.data
@@ -0,0 +1,38 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "hf", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "argv": {"--precision": "fp16", "--num-workers": 8, "--model": "Bert", "--batch-size": 32}, "plan": {"method": "per_gpu"}, "tags": ["huggingface", "language-modeling", "nlp", "precision-showcase", "transformer"], "weight": 0.0, "name": "bert-fp16", "tag": ["bert-fp16", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712255816.478905, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-fp16.D0-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "fp16", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712255818.8367445}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 10.480685234069824}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [1, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [23999.8125, 24512.0], "load": 0.17, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 1.86 GiB. GPU 0 has a total capacty of 23.73 GiB of which 512.19 MiB is free. Including non-PyTorch memory, this process has 21.08 GiB memory in use. Of the allocated memory 19.58 GiB is allocated by PyTorch, and 1.22 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"}
+{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"}
+{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"}
+{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"}
+{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 156, in \n", "pipe": "stderr"}
+{"event": "line", "data": " main()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 152, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " runner.train()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 70, in train\n", "pipe": "stderr"}
+{"event": "line", "data": " self.step(data)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 59, in step\n", "pipe": "stderr"}
+{"event": "line", "data": " self.amp_scaler.scale(loss).backward()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/_tensor.py\", line 492, in backward\n", "pipe": "stderr"}
+{"event": "line", "data": " torch.autograd.backward(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/autograd/__init__.py\", line 251, in backward\n", "pipe": "stderr"}
+{"event": "line", "data": " Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass\n", "pipe": "stderr"}
+{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 1.86 GiB. GPU 0 has a total capacty of 23.73 GiB of which 512.19 MiB is free. Including non-PyTorch memory, this process has 21.08 GiB memory in use. Of the allocated memory 19.58 GiB is allocated by PyTorch, and 1.22 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-fp16.D0-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "fp16", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712255823.3500848, "return_code": 1}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/bert-fp16.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/bert-fp16.D1.data
new file mode 100644
index 000000000..7d76cc6e2
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/bert-fp16.D1.data
@@ -0,0 +1,38 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "hf", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "argv": {"--precision": "fp16", "--num-workers": 8, "--model": "Bert", "--batch-size": 32}, "plan": {"method": "per_gpu"}, "tags": ["huggingface", "language-modeling", "nlp", "precision-showcase", "transformer"], "weight": 0.0, "name": "bert-fp16", "tag": ["bert-fp16", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.75, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09830899151436032}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712255818.821121, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-fp16.D1-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "fp16", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712255818.8442266}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 10.480685234069824}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [1, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [23999.8125, 24512.0], "load": 0.15, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 1.86 GiB. GPU 0 has a total capacty of 23.73 GiB of which 512.19 MiB is free. Including non-PyTorch memory, this process has 21.08 GiB memory in use. Of the allocated memory 19.58 GiB is allocated by PyTorch, and 1.22 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"}
+{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"}
+{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"}
+{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"}
+{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 156, in \n", "pipe": "stderr"}
+{"event": "line", "data": " main()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 152, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " runner.train()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 70, in train\n", "pipe": "stderr"}
+{"event": "line", "data": " self.step(data)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 59, in step\n", "pipe": "stderr"}
+{"event": "line", "data": " self.amp_scaler.scale(loss).backward()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/_tensor.py\", line 492, in backward\n", "pipe": "stderr"}
+{"event": "line", "data": " torch.autograd.backward(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/autograd/__init__.py\", line 251, in backward\n", "pipe": "stderr"}
+{"event": "line", "data": " Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass\n", "pipe": "stderr"}
+{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 1.86 GiB. GPU 0 has a total capacty of 23.73 GiB of which 512.19 MiB is free. Including non-PyTorch memory, this process has 21.08 GiB memory in use. Of the allocated memory 19.58 GiB is allocated by PyTorch, and 1.22 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-fp16.D1-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "fp16", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712255823.0727057, "return_code": 1}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/bert-fp32.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/bert-fp32.D0.data
new file mode 100644
index 000000000..ff3448ff8
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/bert-fp32.D0.data
@@ -0,0 +1,56 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "hf", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "argv": {"--precision": "fp32", "--num-workers": 8, "--model": "Bert", "--batch-size": 32}, "plan": {"method": "per_gpu"}, "tags": ["huggingface", "language-modeling", "nlp", "precision-showcase", "transformer"], "weight": 0.0, "name": "bert-fp32", "tag": ["bert-fp32", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712255807.772349, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-fp32.D0-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "fp32", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712255810.1287518}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [23323.8125, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 1.86 GiB. GPU 0 has a total capacty of 23.73 GiB of which 1.16 GiB is free. Including non-PyTorch memory, this process has 20.42 GiB memory in use. Of the allocated memory 20.07 GiB is allocated by PyTorch, and 81.52 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"}
+{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"}
+{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"}
+{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"}
+{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 156, in \n", "pipe": "stderr"}
+{"event": "line", "data": " main()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 152, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " runner.train()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 70, in train\n", "pipe": "stderr"}
+{"event": "line", "data": " self.step(data)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 55, in step\n", "pipe": "stderr"}
+{"event": "line", "data": " outputs = self.model(**data)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/bert/modeling_bert.py\", line 1375, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " prediction_scores = self.cls(sequence_output)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/bert/modeling_bert.py\", line 707, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " prediction_scores = self.predictions(sequence_output)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/bert/modeling_bert.py\", line 697, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " hidden_states = self.decoder(hidden_states)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/linear.py\", line 114, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return F.linear(input, self.weight, self.bias)\n", "pipe": "stderr"}
+{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 1.86 GiB. GPU 0 has a total capacty of 23.73 GiB of which 1.16 GiB is free. Including non-PyTorch memory, this process has 20.42 GiB memory in use. Of the allocated memory 20.07 GiB is allocated by PyTorch, and 81.52 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-fp32.D0-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "fp32", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712255814.0971403, "return_code": 1}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/bert-fp32.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/bert-fp32.D1.data
new file mode 100644
index 000000000..3ff463818
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/bert-fp32.D1.data
@@ -0,0 +1,56 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "hf", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "argv": {"--precision": "fp32", "--num-workers": 8, "--model": "Bert", "--batch-size": 32}, "plan": {"method": "per_gpu"}, "tags": ["huggingface", "language-modeling", "nlp", "precision-showcase", "transformer"], "weight": 0.0, "name": "bert-fp32", "tag": ["bert-fp32", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0.02, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.75, "total": 24512.0}, "utilization": {"compute": 0.12, "memory": 0.09830899151436032}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712255810.113409, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-fp32.D1-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "fp32", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712255810.1362917}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [23323.8125, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 1.86 GiB. GPU 0 has a total capacty of 23.73 GiB of which 1.16 GiB is free. Including non-PyTorch memory, this process has 20.42 GiB memory in use. Of the allocated memory 20.07 GiB is allocated by PyTorch, and 81.52 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"}
+{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"}
+{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"}
+{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"}
+{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 156, in \n", "pipe": "stderr"}
+{"event": "line", "data": " main()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 152, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " runner.train()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 70, in train\n", "pipe": "stderr"}
+{"event": "line", "data": " self.step(data)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 55, in step\n", "pipe": "stderr"}
+{"event": "line", "data": " outputs = self.model(**data)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/bert/modeling_bert.py\", line 1375, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " prediction_scores = self.cls(sequence_output)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/bert/modeling_bert.py\", line 707, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " prediction_scores = self.predictions(sequence_output)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/bert/modeling_bert.py\", line 697, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " hidden_states = self.decoder(hidden_states)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/linear.py\", line 114, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return F.linear(input, self.weight, self.bias)\n", "pipe": "stderr"}
+{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 1.86 GiB. GPU 0 has a total capacty of 23.73 GiB of which 1.16 GiB is free. Including non-PyTorch memory, this process has 20.42 GiB memory in use. Of the allocated memory 20.07 GiB is allocated by PyTorch, and 81.52 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-fp32.D1-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "fp32", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712255813.9872866, "return_code": 1}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/bert-tf32-fp16.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/bert-tf32-fp16.D0.data
new file mode 100644
index 000000000..bf98cee01
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/bert-tf32-fp16.D0.data
@@ -0,0 +1,38 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "hf", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "argv": {"--precision": "tf32-fp16", "--num-workers": 8, "--model": "Bert", "--batch-size": 32}, "plan": {"method": "per_gpu"}, "tags": ["huggingface", "language-modeling", "nlp", "precision-showcase", "transformer"], "weight": 3.0, "name": "bert-tf32-fp16", "tag": ["bert-tf32-fp16", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712255834.408576, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-tf32-fp16.D0-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712255836.796411}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 10.480685234069824}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [1, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [23999.8125, 24512.0], "load": 0.19, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 1.86 GiB. GPU 0 has a total capacty of 23.73 GiB of which 512.19 MiB is free. Including non-PyTorch memory, this process has 21.08 GiB memory in use. Of the allocated memory 19.58 GiB is allocated by PyTorch, and 1.22 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"}
+{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"}
+{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"}
+{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"}
+{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 156, in \n", "pipe": "stderr"}
+{"event": "line", "data": " main()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 152, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " runner.train()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 70, in train\n", "pipe": "stderr"}
+{"event": "line", "data": " self.step(data)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 59, in step\n", "pipe": "stderr"}
+{"event": "line", "data": " self.amp_scaler.scale(loss).backward()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/_tensor.py\", line 492, in backward\n", "pipe": "stderr"}
+{"event": "line", "data": " torch.autograd.backward(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/autograd/__init__.py\", line 251, in backward\n", "pipe": "stderr"}
+{"event": "line", "data": " Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass\n", "pipe": "stderr"}
+{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 1.86 GiB. GPU 0 has a total capacty of 23.73 GiB of which 512.19 MiB is free. Including non-PyTorch memory, this process has 21.08 GiB memory in use. Of the allocated memory 19.58 GiB is allocated by PyTorch, and 1.22 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-tf32-fp16.D0-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712255841.190781, "return_code": 1}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/bert-tf32-fp16.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/bert-tf32-fp16.D1.data
new file mode 100644
index 000000000..ab237f264
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/bert-tf32-fp16.D1.data
@@ -0,0 +1,38 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "hf", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "argv": {"--precision": "tf32-fp16", "--num-workers": 8, "--model": "Bert", "--batch-size": 32}, "plan": {"method": "per_gpu"}, "tags": ["huggingface", "language-modeling", "nlp", "precision-showcase", "transformer"], "weight": 3.0, "name": "bert-tf32-fp16", "tag": ["bert-tf32-fp16", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0.04, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.75, "total": 24512.0}, "utilization": {"compute": 0.11, "memory": 0.09830899151436032}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712255836.779172, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-tf32-fp16.D1-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712255836.8041034}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 10.480685234069824}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [1, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [23999.8125, 24512.0], "load": 0.15, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 1.86 GiB. GPU 0 has a total capacty of 23.73 GiB of which 512.19 MiB is free. Including non-PyTorch memory, this process has 21.08 GiB memory in use. Of the allocated memory 19.58 GiB is allocated by PyTorch, and 1.22 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"}
+{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"}
+{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"}
+{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"}
+{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 156, in \n", "pipe": "stderr"}
+{"event": "line", "data": " main()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 152, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " runner.train()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 70, in train\n", "pipe": "stderr"}
+{"event": "line", "data": " self.step(data)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 59, in step\n", "pipe": "stderr"}
+{"event": "line", "data": " self.amp_scaler.scale(loss).backward()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/_tensor.py\", line 492, in backward\n", "pipe": "stderr"}
+{"event": "line", "data": " torch.autograd.backward(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/autograd/__init__.py\", line 251, in backward\n", "pipe": "stderr"}
+{"event": "line", "data": " Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass\n", "pipe": "stderr"}
+{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 1.86 GiB. GPU 0 has a total capacty of 23.73 GiB of which 512.19 MiB is free. Including non-PyTorch memory, this process has 21.08 GiB memory in use. Of the allocated memory 19.58 GiB is allocated by PyTorch, and 1.22 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-tf32-fp16.D1-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712255841.1329126, "return_code": 1}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/bert-tf32.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/bert-tf32.D0.data
new file mode 100644
index 000000000..ceb0c2436
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/bert-tf32.D0.data
@@ -0,0 +1,56 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "hf", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "argv": {"--precision": "tf32", "--num-workers": 8, "--model": "Bert", "--batch-size": 32}, "plan": {"method": "per_gpu"}, "tags": ["huggingface", "language-modeling", "nlp", "precision-showcase", "transformer"], "weight": 0.0, "name": "bert-tf32", "tag": ["bert-tf32", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712255825.712579, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-tf32.D0-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "tf32", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712255828.0542758}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [23323.8125, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 1.86 GiB. GPU 0 has a total capacty of 23.73 GiB of which 1.16 GiB is free. Including non-PyTorch memory, this process has 20.42 GiB memory in use. Of the allocated memory 20.07 GiB is allocated by PyTorch, and 81.52 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"}
+{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"}
+{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"}
+{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"}
+{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 156, in \n", "pipe": "stderr"}
+{"event": "line", "data": " main()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 152, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " runner.train()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 70, in train\n", "pipe": "stderr"}
+{"event": "line", "data": " self.step(data)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 55, in step\n", "pipe": "stderr"}
+{"event": "line", "data": " outputs = self.model(**data)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/bert/modeling_bert.py\", line 1375, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " prediction_scores = self.cls(sequence_output)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/bert/modeling_bert.py\", line 707, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " prediction_scores = self.predictions(sequence_output)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/bert/modeling_bert.py\", line 697, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " hidden_states = self.decoder(hidden_states)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/linear.py\", line 114, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return F.linear(input, self.weight, self.bias)\n", "pipe": "stderr"}
+{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 1.86 GiB. GPU 0 has a total capacty of 23.73 GiB of which 1.16 GiB is free. Including non-PyTorch memory, this process has 20.42 GiB memory in use. Of the allocated memory 20.07 GiB is allocated by PyTorch, and 81.52 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-tf32.D0-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "tf32", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712255831.7830172, "return_code": 1}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/bert-tf32.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/bert-tf32.D1.data
new file mode 100644
index 000000000..43fd61b5f
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/bert-tf32.D1.data
@@ -0,0 +1,56 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "hf", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "argv": {"--precision": "tf32", "--num-workers": 8, "--model": "Bert", "--batch-size": 32}, "plan": {"method": "per_gpu"}, "tags": ["huggingface", "language-modeling", "nlp", "precision-showcase", "transformer"], "weight": 0.0, "name": "bert-tf32", "tag": ["bert-tf32", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.75, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09830899151436032}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712255828.038916, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-tf32.D1-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "tf32", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712255828.0615072}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [23323.8125, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 1.86 GiB. GPU 0 has a total capacty of 23.73 GiB of which 1.16 GiB is free. Including non-PyTorch memory, this process has 20.42 GiB memory in use. Of the allocated memory 20.07 GiB is allocated by PyTorch, and 81.52 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"}
+{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"}
+{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"}
+{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"}
+{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 156, in \n", "pipe": "stderr"}
+{"event": "line", "data": " main()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 152, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " runner.train()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 70, in train\n", "pipe": "stderr"}
+{"event": "line", "data": " self.step(data)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 55, in step\n", "pipe": "stderr"}
+{"event": "line", "data": " outputs = self.model(**data)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/bert/modeling_bert.py\", line 1375, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " prediction_scores = self.cls(sequence_output)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/bert/modeling_bert.py\", line 707, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " prediction_scores = self.predictions(sequence_output)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/bert/modeling_bert.py\", line 697, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " hidden_states = self.decoder(hidden_states)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/linear.py\", line 114, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return F.linear(input, self.weight, self.bias)\n", "pipe": "stderr"}
+{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 1.86 GiB. GPU 0 has a total capacty of 23.73 GiB of which 1.16 GiB is free. Including non-PyTorch memory, this process has 20.42 GiB memory in use. Of the allocated memory 20.07 GiB is allocated by PyTorch, and 81.52 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-tf32.D1-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "tf32", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712255832.0014517, "return_code": 1}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/bf16.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/bf16.D0.data
new file mode 100644
index 000000000..164ca0573
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/bf16.D0.data
@@ -0,0 +1,137 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/flops", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "flops", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops", "plan": {"method": "per_gpu"}, "tags": ["diagnostic", "flops"], "argv": {"--number": 10, "--repeat": 90, "--m": 8192, "--n": 8192, "--dtype": "bf16"}, "weight": 0.0, "name": "bf16", "tag": ["bf16", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0.04, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712255466.590297, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "10", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "bf16"], "time": 1712255468.905043}, "pipe": null}
+{"event": "data", "data": {"task": "train", "rate": 84.31733446984312, "units": "Tflops", "t": 1712255470.4362257}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2412.5625, 24512.0], "load": 0, "temperature": null, "power": null}}, "t": 1712255470.0362403}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 94.50631369394608, "units": "Tflops", "t": 1712255470.6695824}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.09, "temperature": null, "power": null}}, "t": 1712255470.5420935}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 94.51009040606831, "units": "Tflops", "t": 1712255470.9023838}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 90.24605109321959, "units": "Tflops", "t": 1712255471.146118}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.22, "temperature": null, "power": null}}, "t": 1712255471.0473926}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.38217852944457, "units": "Tflops", "t": 1712255471.384261}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 94.51280207785311, "units": "Tflops", "t": 1712255471.6169817}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.42, "temperature": null, "power": null}}, "t": 1712255471.5526612}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 94.06555294663043, "units": "Tflops", "t": 1712255471.8508568}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.34430091058142, "units": "Tflops", "t": 1712255472.0916586}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.55, "temperature": null, "power": null}}, "t": 1712255472.058135}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.02050884954336, "units": "Tflops", "t": 1712255472.3309634}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.65265169436914, "units": "Tflops", "t": 1712255472.565822}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.75, "temperature": null, "power": null}}, "t": 1712255472.5634499}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.99997999260887, "units": "Tflops", "t": 1712255472.799877}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.55228533466969, "units": "Tflops", "t": 1712255473.0375352}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.4306846651339, "units": "Tflops", "t": 1712255473.278101}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.88, "temperature": null, "power": null}}, "t": 1712255473.0687656}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.46464980649935, "units": "Tflops", "t": 1712255473.5134919}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.83167377290025, "units": "Tflops", "t": 1712255473.7478988}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255473.574011}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.20449154472807, "units": "Tflops", "t": 1712255473.986491}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.6424433242199, "units": "Tflops", "t": 1712255474.2264953}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255474.0793128}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.22865659169432, "units": "Tflops", "t": 1712255474.4625025}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.54920935043356, "units": "Tflops", "t": 1712255474.6976256}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255474.584662}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.01711047295095, "units": "Tflops", "t": 1712255474.93414}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.86543185225207, "units": "Tflops", "t": 1712255475.1735651}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255475.0900052}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.90645601656371, "units": "Tflops", "t": 1712255475.4103608}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.47573242528249, "units": "Tflops", "t": 1712255475.6456726}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255475.5955245}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.10687665784504, "units": "Tflops", "t": 1712255475.88196}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.83460716580517, "units": "Tflops", "t": 1712255476.1214762}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255476.100936}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.48787705469407, "units": "Tflops", "t": 1712255476.359656}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.38449753163003, "units": "Tflops", "t": 1712255476.5951912}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.5363141157061, "units": "Tflops", "t": 1712255476.832881}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255476.6061387}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.20412284511447, "units": "Tflops", "t": 1712255477.0714762}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.38930395641036, "units": "Tflops", "t": 1712255477.309551}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255477.1116827}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.35415696038632, "units": "Tflops", "t": 1712255477.5452068}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.48750608524567, "units": "Tflops", "t": 1712255477.783023}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255477.6170638}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.37745969583447, "units": "Tflops", "t": 1712255478.0211692}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.07360437054737, "units": "Tflops", "t": 1712255478.2600505}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255478.1223423}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.39140016499302, "units": "Tflops", "t": 1712255478.495614}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.41948353044396, "units": "Tflops", "t": 1712255478.7336192}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255478.6275706}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.40976096318262, "units": "Tflops", "t": 1712255478.9718282}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.42605899732119, "units": "Tflops", "t": 1712255479.2098012}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255479.1328607}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.79923893059077, "units": "Tflops", "t": 1712255479.4468596}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.4768419861293, "units": "Tflops", "t": 1712255479.6847122}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255479.638394}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.42078002732306, "units": "Tflops", "t": 1712255479.9227474}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.28826273182227, "units": "Tflops", "t": 1712255480.1610806}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255480.1437087}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.51904911602156, "units": "Tflops", "t": 1712255480.398864}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.42642947400104, "units": "Tflops", "t": 1712255480.636837}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.5540499353741, "units": "Tflops", "t": 1712255480.874483}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255480.6490898}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.23003116730106, "units": "Tflops", "t": 1712255481.1130168}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.41716844791688, "units": "Tflops", "t": 1712255481.351014}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255481.1544216}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.50726434922842, "units": "Tflops", "t": 1712255481.5888412}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.56677562725463, "units": "Tflops", "t": 1712255481.8264527}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255481.659704}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.11130444494044, "units": "Tflops", "t": 1712255482.0652885}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.28613889544037, "units": "Tflops", "t": 1712255482.3036225}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255482.165049}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.42522543565042, "units": "Tflops", "t": 1712255482.5416481}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.4191131094453, "units": "Tflops", "t": 1712255482.7796466}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255482.6703403}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.50819217473018, "units": "Tflops", "t": 1712255483.0174541}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.41050165821994, "units": "Tflops", "t": 1712255483.255465}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255483.1756403}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.42300267803238, "units": "Tflops", "t": 1712255483.4934871}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.41679804547584, "units": "Tflops", "t": 1712255483.731491}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255483.6809924}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.4136497446005, "units": "Tflops", "t": 1712255483.9695513}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.2733056166573, "units": "Tflops", "t": 1712255484.208019}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255484.186317}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.4243918890148, "units": "Tflops", "t": 1712255484.4460564}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.41957613615757, "units": "Tflops", "t": 1712255484.6840491}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.24995636086726, "units": "Tflops", "t": 1712255484.9224758}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255484.6917884}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.41855748351477, "units": "Tflops", "t": 1712255485.1605139}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.42642947400104, "units": "Tflops", "t": 1712255485.3984852}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255485.1970625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.41809446700944, "units": "Tflops", "t": 1712255485.636522}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 90.68714185141376, "units": "Tflops", "t": 1712255485.87906}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255485.7023659}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.43245013634089, "units": "Tflops", "t": 1712255486.1170676}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.37884753803, "units": "Tflops", "t": 1712255486.3551602}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255486.2078986}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.42661471345474, "units": "Tflops", "t": 1712255486.593173}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.71853734428491, "units": "Tflops", "t": 1712255486.8329842}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255486.7132907}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.41976134814156, "units": "Tflops", "t": 1712255487.0710182}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.40790927753591, "units": "Tflops", "t": 1712255487.309043}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255487.219902}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.35821924809419, "units": "Tflops", "t": 1712255487.5473814}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.37805624982441, "units": "Tflops", "t": 1712255487.7880902}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255487.7251945}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.38828597097721, "units": "Tflops", "t": 1712255488.0262096}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.29962229886915, "units": "Tflops", "t": 1712255488.2645056}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255488.2305722}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.39892965477048, "units": "Tflops", "t": 1712255488.5025978}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.37850890371786, "units": "Tflops", "t": 1712255488.7433012}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255488.7359383}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.57047541511608, "units": "Tflops", "t": 1712255488.9835434}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.02546673732121, "units": "Tflops", "t": 1712255489.2225525}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.42689257402746, "units": "Tflops", "t": 1712255489.4605222}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255489.2412555}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 89.79489112557708, "units": "Tflops", "t": 1712255489.7055163}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.38541722362895, "units": "Tflops", "t": 1712255489.9435904}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255489.746608}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.29066370805562, "units": "Tflops", "t": 1712255490.181972}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.28733931268843, "units": "Tflops", "t": 1712255490.4202974}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255490.2520156}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.55084263334564, "units": "Tflops", "t": 1712255490.66059}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.75950723515646, "units": "Tflops", "t": 1712255490.9004068}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255490.7572982}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.17868969080234, "units": "Tflops", "t": 1712255491.13908}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.32734029160343, "units": "Tflops", "t": 1712255491.3773057}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255491.262598}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.38249245136332, "units": "Tflops", "t": 1712255491.6180596}, "pipe": "data"}
+{"event": "end", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "10", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "bf16"], "time": 1712255492.1812196, "return_code": 0}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/bf16.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/bf16.D1.data
new file mode 100644
index 000000000..1884083d8
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/bf16.D1.data
@@ -0,0 +1,137 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/flops", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "flops", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops", "plan": {"method": "per_gpu"}, "tags": ["diagnostic", "flops"], "argv": {"--number": 10, "--repeat": 90, "--m": 8192, "--n": 8192, "--dtype": "bf16"}, "weight": 0.0, "name": "bf16", "tag": ["bf16", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.75, "total": 24512.0}, "utilization": {"compute": 0.04, "memory": 0.09830899151436032}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712255468.896415, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "10", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "bf16"], "time": 1712255468.905882}, "pipe": null}
+{"event": "data", "data": {"task": "train", "rate": 84.43713323423174, "units": "Tflops", "t": 1712255470.4139261}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2412.5625, 24512.0], "load": 0, "temperature": null, "power": null}}, "t": 1712255470.0124965}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 95.42324224043428, "units": "Tflops", "t": 1712255470.6449673}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.1, "temperature": null, "power": null}}, "t": 1712255470.5186343}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 94.7306303225906, "units": "Tflops", "t": 1712255470.8772168}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 90.63171418855094, "units": "Tflops", "t": 1712255471.1199105}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.23, "temperature": null, "power": null}}, "t": 1712255471.0245}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.7691908690281, "units": "Tflops", "t": 1712255471.3547773}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 94.44853859868697, "units": "Tflops", "t": 1712255471.5876806}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.43, "temperature": null, "power": null}}, "t": 1712255471.5298777}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.41637163342693, "units": "Tflops", "t": 1712255471.8232083}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.82948696691938, "units": "Tflops", "t": 1712255472.0627613}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.56, "temperature": null, "power": null}}, "t": 1712255472.0355153}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.33889436923431, "units": "Tflops", "t": 1712255472.3015506}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 94.02460705920443, "units": "Tflops", "t": 1712255472.535485}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 94.49556624211912, "units": "Tflops", "t": 1712255472.76837}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.76, "temperature": null, "power": null}}, "t": 1712255472.5411787}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.96676329714575, "units": "Tflops", "t": 1712255473.0052345}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.89014312340933, "units": "Tflops", "t": 1712255473.2446036}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.89, "temperature": null, "power": null}}, "t": 1712255473.0466676}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.5119350081745, "units": "Tflops", "t": 1712255473.4800181}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 94.5064105289479, "units": "Tflops", "t": 1712255473.7127585}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255473.552015}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.18994236717737, "units": "Tflops", "t": 1712255473.9488537}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.94217174195401, "units": "Tflops", "t": 1712255474.188145}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255474.0578268}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.89868918573929, "units": "Tflops", "t": 1712255474.4251978}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 94.57482560148696, "units": "Tflops", "t": 1712255474.658213}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255474.5634713}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.68556248566537, "units": "Tflops", "t": 1712255474.8958733}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.81422119192186, "units": "Tflops", "t": 1712255475.135553}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255475.069129}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.98841433932571, "units": "Tflops", "t": 1712255475.372179}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 94.0623872388584, "units": "Tflops", "t": 1712255475.6061213}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255475.574926}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.21716142154506, "units": "Tflops", "t": 1712255475.8421662}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.08472727980903, "units": "Tflops", "t": 1712255476.0810685}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255476.0806623}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.5115325006472, "units": "Tflops", "t": 1712255476.3190844}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.38544303752961, "units": "Tflops", "t": 1712255476.554632}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.0303391627988, "units": "Tflops", "t": 1712255476.7911675}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255476.5860624}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.01050288854016, "units": "Tflops", "t": 1712255477.030295}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.45079483481992, "units": "Tflops", "t": 1712255477.2682111}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255477.0919209}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.7488474673816, "units": "Tflops", "t": 1712255477.5054321}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.69089485624697, "units": "Tflops", "t": 1712255477.7404397}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255477.5977395}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.02142693660501, "units": "Tflops", "t": 1712255477.9797747}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.89490385802321, "units": "Tflops", "t": 1712255478.219234}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255478.1033907}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.84024569366447, "units": "Tflops", "t": 1712255478.4564245}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.85146113950891, "units": "Tflops", "t": 1712255478.6933143}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255478.6090999}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.20126552308362, "units": "Tflops", "t": 1712255478.9320936}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.17297837579811, "units": "Tflops", "t": 1712255479.1708422}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255479.114808}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.9674192411993, "units": "Tflops", "t": 1712255479.4076736}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.79027644667492, "units": "Tflops", "t": 1712255479.644727}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255479.6212878}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.48667141486742, "units": "Tflops", "t": 1712255479.882796}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.36931852814708, "units": "Tflops", "t": 1712255480.1210608}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.60116177848478, "units": "Tflops", "t": 1712255480.35859}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255480.1269372}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.23768658907109, "units": "Tflops", "t": 1712255480.5973053}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.11397220468167, "units": "Tflops", "t": 1712255480.8361886}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255480.6322792}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.36746846288202, "units": "Tflops", "t": 1712255481.0743773}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.58526662592641, "units": "Tflops", "t": 1712255481.312054}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255481.1378949}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.97591188752702, "units": "Tflops", "t": 1712255481.5515196}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.68714588622315, "units": "Tflops", "t": 1712255481.788829}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255481.6435814}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.61817842719907, "units": "Tflops", "t": 1712255482.0264785}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.00609728386748, "units": "Tflops", "t": 1712255482.2655454}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255482.1492035}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.44245543291528, "units": "Tflops", "t": 1712255482.5037355}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.62375901777159, "units": "Tflops", "t": 1712255482.7412024}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255482.6548126}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.52415368697116, "units": "Tflops", "t": 1712255482.9791477}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.30627309920374, "units": "Tflops", "t": 1712255483.2174418}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255483.160345}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.42476235233003, "units": "Tflops", "t": 1712255483.4554894}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.44894150436994, "units": "Tflops", "t": 1712255483.6934173}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255483.6661813}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.44643962612749, "units": "Tflops", "t": 1712255483.9314725}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.0283131735444, "units": "Tflops", "t": 1712255484.1705503}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.42031698854764, "units": "Tflops", "t": 1712255484.4085517}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255484.1717713}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.52118368626097, "units": "Tflops", "t": 1712255484.6463883}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.60199851664846, "units": "Tflops", "t": 1712255484.8839822}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255484.6774912}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.26582887699695, "units": "Tflops", "t": 1712255485.122456}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.03510860417994, "units": "Tflops", "t": 1712255485.3614542}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255485.1830454}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.58829763511311, "units": "Tflops", "t": 1712255485.601731}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.64739096922534, "units": "Tflops", "t": 1712255485.839205}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255485.6886952}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.98361691415816, "units": "Tflops", "t": 1712255486.078608}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.4473662319097, "units": "Tflops", "t": 1712255486.316537}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255486.1940565}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.98912129502428, "units": "Tflops", "t": 1712255486.5558977}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.23076898394027, "units": "Tflops", "t": 1712255486.7945006}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255486.6999109}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.23962370446485, "units": "Tflops", "t": 1712255487.0331292}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.3931910162418, "units": "Tflops", "t": 1712255487.2713127}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255487.2059104}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.13053417027704, "units": "Tflops", "t": 1712255487.5103962}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.91459290199205, "units": "Tflops", "t": 1712255487.7497797}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255487.7117453}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.42031698854764, "units": "Tflops", "t": 1712255487.9878573}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.33177671541954, "units": "Tflops", "t": 1712255488.226122}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255488.21736}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.59148090444579, "units": "Tflops", "t": 1712255488.4663923}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.2574305281935, "units": "Tflops", "t": 1712255488.704815}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.4161498483484, "units": "Tflops", "t": 1712255488.9428568}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255488.7229724}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.42318790375046, "units": "Tflops", "t": 1712255489.1809745}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.84795894099558, "units": "Tflops", "t": 1712255489.4204614}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255489.2285285}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.0632192864293, "units": "Tflops", "t": 1712255489.6594381}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.26389066079385, "units": "Tflops", "t": 1712255489.8978834}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255489.7343385}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.12556495529024, "units": "Tflops", "t": 1712255490.136759}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.56193048458238, "units": "Tflops", "t": 1712255490.3770025}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255490.2398126}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.5783854423225, "units": "Tflops", "t": 1712255490.6173174}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.15041639212926, "units": "Tflops", "t": 1712255490.8560798}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255490.7452235}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.94391315428545, "units": "Tflops", "t": 1712255491.0953827}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.77210667198766, "units": "Tflops", "t": 1712255491.335063}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255491.2509296}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.55529562794469, "units": "Tflops", "t": 1712255491.5754318}, "pipe": "data"}
+{"event": "end", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "10", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "bf16"], "time": 1712255492.0723476, "return_code": 0}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/convnext_large-fp16.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/convnext_large-fp16.D0.data
new file mode 100644
index 000000000..973c56c7d
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/convnext_large-fp16.D0.data
@@ -0,0 +1,70 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "torchvision", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision", "plan": {"method": "per_gpu"}, "argv": {"--precision": "fp16", "--lr": 0.01, "--no-stdout": true, "--epochs": 50, "--model": "convnext_large", "--batch-size": 128}, "tags": ["classification", "convnet", "precision-showcase", "vision"], "weight": 0.0, "name": "convnext_large-fp16", "tag": ["convnext_large-fp16", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712255763.64029, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-fp16.D0-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712255766.011682}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "progress": [0, 50]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [10849.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24393.8125, 24512.0], "load": 0.06, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 148.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 118.19 MiB is free. Including non-PyTorch memory, this process has 21.47 GiB memory in use. Of the allocated memory 21.13 GiB is allocated by PyTorch, and 53.29 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"}
+{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"}
+{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"}
+{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"}
+{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 224, in \n", "pipe": "stderr"}
+{"event": "line", "data": " main()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 218, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " train_epoch(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 51, in train_epoch\n", "pipe": "stderr"}
+{"event": "line", "data": " output = model(inp)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 176, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._forward_impl(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 170, in _forward_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " x = self.features(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 63, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " result = self.layer_scale * self.block(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/activation.py\", line 682, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return F.gelu(input, approximate=self.approximate)\n", "pipe": "stderr"}
+{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 148.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 118.19 MiB is free. Including non-PyTorch memory, this process has 21.47 GiB memory in use. Of the allocated memory 21.13 GiB is allocated by PyTorch, and 53.29 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-fp16.D0-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712255771.5214307, "return_code": 1}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/convnext_large-fp16.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/convnext_large-fp16.D1.data
new file mode 100644
index 000000000..4937c37d4
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/convnext_large-fp16.D1.data
@@ -0,0 +1,70 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "torchvision", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision", "plan": {"method": "per_gpu"}, "argv": {"--precision": "fp16", "--lr": 0.01, "--no-stdout": true, "--epochs": 50, "--model": "convnext_large", "--batch-size": 128}, "tags": ["classification", "convnet", "precision-showcase", "vision"], "weight": 0.0, "name": "convnext_large-fp16", "tag": ["convnext_large-fp16", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.75, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09830899151436032}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712255765.997041, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-fp16.D1-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712255766.0182161}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "progress": [0, 50]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5243.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24393.8125, 24512.0], "load": 0.04, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 148.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 118.19 MiB is free. Including non-PyTorch memory, this process has 21.47 GiB memory in use. Of the allocated memory 21.13 GiB is allocated by PyTorch, and 53.29 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"}
+{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"}
+{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"}
+{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"}
+{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 224, in \n", "pipe": "stderr"}
+{"event": "line", "data": " main()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 218, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " train_epoch(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 51, in train_epoch\n", "pipe": "stderr"}
+{"event": "line", "data": " output = model(inp)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 176, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._forward_impl(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 170, in _forward_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " x = self.features(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 63, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " result = self.layer_scale * self.block(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/activation.py\", line 682, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return F.gelu(input, approximate=self.approximate)\n", "pipe": "stderr"}
+{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 148.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 118.19 MiB is free. Including non-PyTorch memory, this process has 21.47 GiB memory in use. Of the allocated memory 21.13 GiB is allocated by PyTorch, and 53.29 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-fp16.D1-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712255771.6062105, "return_code": 1}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/convnext_large-fp32.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/convnext_large-fp32.D0.data
new file mode 100644
index 000000000..2db857c87
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/convnext_large-fp32.D0.data
@@ -0,0 +1,70 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "torchvision", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision", "plan": {"method": "per_gpu"}, "argv": {"--precision": "fp32", "--lr": 0.01, "--no-stdout": true, "--epochs": 50, "--model": "convnext_large", "--batch-size": 128}, "tags": ["classification", "convnet", "precision-showcase", "vision"], "weight": 0.0, "name": "convnext_large-fp32", "tag": ["convnext_large-fp32", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0.02, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712255753.452555, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-fp32.D0-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "fp32", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712255755.788694}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "progress": [0, 50]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24429.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24427.8125, 24512.0], "load": 0.06, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 294.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 84.19 MiB is free. Including non-PyTorch memory, this process has 21.50 GiB memory in use. Of the allocated memory 21.17 GiB is allocated by PyTorch, and 47.55 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"}
+{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"}
+{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"}
+{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"}
+{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 224, in \n", "pipe": "stderr"}
+{"event": "line", "data": " main()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 218, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " train_epoch(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 51, in train_epoch\n", "pipe": "stderr"}
+{"event": "line", "data": " output = model(inp)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 176, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._forward_impl(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 170, in _forward_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " x = self.features(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 63, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " result = self.layer_scale * self.block(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/linear.py\", line 114, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return F.linear(input, self.weight, self.bias)\n", "pipe": "stderr"}
+{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 294.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 84.19 MiB is free. Including non-PyTorch memory, this process has 21.50 GiB memory in use. Of the allocated memory 21.17 GiB is allocated by PyTorch, and 47.55 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-fp32.D0-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "fp32", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712255761.3059878, "return_code": 1}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/convnext_large-fp32.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/convnext_large-fp32.D1.data
new file mode 100644
index 000000000..56a006dda
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/convnext_large-fp32.D1.data
@@ -0,0 +1,70 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "torchvision", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision", "plan": {"method": "per_gpu"}, "argv": {"--precision": "fp32", "--lr": 0.01, "--no-stdout": true, "--epochs": 50, "--model": "convnext_large", "--batch-size": 128}, "tags": ["classification", "convnet", "precision-showcase", "vision"], "weight": 0.0, "name": "convnext_large-fp32", "tag": ["convnext_large-fp32", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.75, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09830899151436032}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712255755.774073, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-fp32.D1-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "fp32", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712255755.7957416}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "progress": [0, 50]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24429.8125, 24512.0], "load": 0.04, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24427.8125, 24512.0], "load": 0.1, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 294.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 84.19 MiB is free. Including non-PyTorch memory, this process has 21.50 GiB memory in use. Of the allocated memory 21.17 GiB is allocated by PyTorch, and 47.55 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"}
+{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"}
+{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"}
+{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"}
+{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 224, in \n", "pipe": "stderr"}
+{"event": "line", "data": " main()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 218, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " train_epoch(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 51, in train_epoch\n", "pipe": "stderr"}
+{"event": "line", "data": " output = model(inp)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 176, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._forward_impl(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 170, in _forward_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " x = self.features(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 63, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " result = self.layer_scale * self.block(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/linear.py\", line 114, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return F.linear(input, self.weight, self.bias)\n", "pipe": "stderr"}
+{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 294.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 84.19 MiB is free. Including non-PyTorch memory, this process has 21.50 GiB memory in use. Of the allocated memory 21.17 GiB is allocated by PyTorch, and 47.55 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-fp32.D1-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "fp32", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712255761.234581, "return_code": 1}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/convnext_large-tf32-fp16.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/convnext_large-tf32-fp16.D0.data
new file mode 100644
index 000000000..e7fa603e3
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/convnext_large-tf32-fp16.D0.data
@@ -0,0 +1,70 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "torchvision", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision", "plan": {"method": "per_gpu"}, "argv": {"--precision": "tf32-fp16", "--lr": 0.01, "--no-stdout": true, "--epochs": 50, "--model": "convnext_large", "--batch-size": 128}, "tags": ["classification", "convnet", "precision-showcase", "vision"], "weight": 3.0, "name": "convnext_large-tf32-fp16", "tag": ["convnext_large-tf32-fp16", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712255784.292977, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-tf32-fp16.D0-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32-fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712255786.6583555}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "progress": [0, 50]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24469.8125, 24512.0], "load": 0.04, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24393.8125, 24512.0], "load": 0.04, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 148.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 118.19 MiB is free. Including non-PyTorch memory, this process has 21.47 GiB memory in use. Of the allocated memory 21.13 GiB is allocated by PyTorch, and 53.29 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"}
+{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"}
+{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"}
+{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"}
+{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 224, in \n", "pipe": "stderr"}
+{"event": "line", "data": " main()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 218, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " train_epoch(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 51, in train_epoch\n", "pipe": "stderr"}
+{"event": "line", "data": " output = model(inp)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 176, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._forward_impl(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 170, in _forward_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " x = self.features(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 63, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " result = self.layer_scale * self.block(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/activation.py\", line 682, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return F.gelu(input, approximate=self.approximate)\n", "pipe": "stderr"}
+{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 148.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 118.19 MiB is free. Including non-PyTorch memory, this process has 21.47 GiB memory in use. Of the allocated memory 21.13 GiB is allocated by PyTorch, and 53.29 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-tf32-fp16.D0-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32-fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712255792.0367098, "return_code": 1}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/convnext_large-tf32-fp16.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/convnext_large-tf32-fp16.D1.data
new file mode 100644
index 000000000..8ecf45fc9
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/convnext_large-tf32-fp16.D1.data
@@ -0,0 +1,70 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "torchvision", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision", "plan": {"method": "per_gpu"}, "argv": {"--precision": "tf32-fp16", "--lr": 0.01, "--no-stdout": true, "--epochs": 50, "--model": "convnext_large", "--batch-size": 128}, "tags": ["classification", "convnet", "precision-showcase", "vision"], "weight": 3.0, "name": "convnext_large-tf32-fp16", "tag": ["convnext_large-tf32-fp16", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.75, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09830899151436032}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712255786.642294, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-tf32-fp16.D1-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32-fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712255786.6658125}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "progress": [0, 50]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24469.8125, 24512.0], "load": 0.03, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24393.8125, 24512.0], "load": 0.09, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 148.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 118.19 MiB is free. Including non-PyTorch memory, this process has 21.47 GiB memory in use. Of the allocated memory 21.13 GiB is allocated by PyTorch, and 53.29 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"}
+{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"}
+{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"}
+{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"}
+{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 224, in \n", "pipe": "stderr"}
+{"event": "line", "data": " main()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 218, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " train_epoch(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 51, in train_epoch\n", "pipe": "stderr"}
+{"event": "line", "data": " output = model(inp)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 176, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._forward_impl(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 170, in _forward_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " x = self.features(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 63, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " result = self.layer_scale * self.block(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/activation.py\", line 682, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return F.gelu(input, approximate=self.approximate)\n", "pipe": "stderr"}
+{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 148.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 118.19 MiB is free. Including non-PyTorch memory, this process has 21.47 GiB memory in use. Of the allocated memory 21.13 GiB is allocated by PyTorch, and 53.29 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-tf32-fp16.D1-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32-fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712255792.1112833, "return_code": 1}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/convnext_large-tf32.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/convnext_large-tf32.D0.data
new file mode 100644
index 000000000..bc84db6eb
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/convnext_large-tf32.D0.data
@@ -0,0 +1,70 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "torchvision", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision", "plan": {"method": "per_gpu"}, "argv": {"--precision": "tf32", "--lr": 0.01, "--no-stdout": true, "--epochs": 50, "--model": "convnext_large", "--batch-size": 128}, "tags": ["classification", "convnet", "precision-showcase", "vision"], "weight": 0.0, "name": "convnext_large-tf32", "tag": ["convnext_large-tf32", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712255773.961569, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-tf32.D0-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712255776.3774183}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "progress": [0, 50]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24429.8125, 24512.0], "load": 0.03, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24427.8125, 24512.0], "load": 0.03, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 294.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 84.19 MiB is free. Including non-PyTorch memory, this process has 21.50 GiB memory in use. Of the allocated memory 21.17 GiB is allocated by PyTorch, and 47.55 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"}
+{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"}
+{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"}
+{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"}
+{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 224, in \n", "pipe": "stderr"}
+{"event": "line", "data": " main()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 218, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " train_epoch(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 51, in train_epoch\n", "pipe": "stderr"}
+{"event": "line", "data": " output = model(inp)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 176, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._forward_impl(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 170, in _forward_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " x = self.features(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 63, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " result = self.layer_scale * self.block(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/linear.py\", line 114, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return F.linear(input, self.weight, self.bias)\n", "pipe": "stderr"}
+{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 294.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 84.19 MiB is free. Including non-PyTorch memory, this process has 21.50 GiB memory in use. Of the allocated memory 21.17 GiB is allocated by PyTorch, and 47.55 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-tf32.D0-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712255781.8679147, "return_code": 1}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/convnext_large-tf32.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/convnext_large-tf32.D1.data
new file mode 100644
index 000000000..57632998b
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/convnext_large-tf32.D1.data
@@ -0,0 +1,70 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "torchvision", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision", "plan": {"method": "per_gpu"}, "argv": {"--precision": "tf32", "--lr": 0.01, "--no-stdout": true, "--epochs": 50, "--model": "convnext_large", "--batch-size": 128}, "tags": ["classification", "convnet", "precision-showcase", "vision"], "weight": 0.0, "name": "convnext_large-tf32", "tag": ["convnext_large-tf32", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.75, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09830899151436032}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712255776.361131, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-tf32.D1-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712255776.3849685}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "progress": [0, 50]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24429.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24427.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 294.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 84.19 MiB is free. Including non-PyTorch memory, this process has 21.50 GiB memory in use. Of the allocated memory 21.17 GiB is allocated by PyTorch, and 47.55 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"}
+{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"}
+{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"}
+{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"}
+{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 224, in \n", "pipe": "stderr"}
+{"event": "line", "data": " main()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 218, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " train_epoch(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 51, in train_epoch\n", "pipe": "stderr"}
+{"event": "line", "data": " output = model(inp)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 176, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._forward_impl(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 170, in _forward_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " x = self.features(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 63, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " result = self.layer_scale * self.block(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/linear.py\", line 114, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return F.linear(input, self.weight, self.bias)\n", "pipe": "stderr"}
+{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 294.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 84.19 MiB is free. Including non-PyTorch memory, this process has 21.50 GiB memory in use. Of the allocated memory 21.17 GiB is allocated by PyTorch, and 47.55 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-tf32.D1-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712255781.9482388, "return_code": 1}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/davit_large-multi.0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/davit_large-multi.0.data
new file mode 100644
index 000000000..84c62211d
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/davit_large-multi.0.data
@@ -0,0 +1,228 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "timm", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm", "plan": {"method": "njobs", "n": 1}, "argv": {"--amp": true, "--model": "davit_large", "--batch-size": 128, "--lr-base": 0.01}, "tags": ["classification", "multigpu", "transformer", "vision"], "weight": 5.0, "name": "davit_large-multi", "tag": ["davit_large-multi", "0"], "job-number": 0, "devices": ["0", "1"]}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712255917.190164, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["torchrun", "--nproc_per_node=2", "--", "-m", "voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-davit_large-multi.0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "davit_large", "--batch-size", "128", "--lr-base", "0.01", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/tuzazolu.2024-04-04_18:28:45.589863/davit_large-multi.0", "--checkpoint-hist", "1"], "time": 1712255917.207126}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "line", "data": "Training in distributed mode with multiple processes, 1 device per process.Process 0, total 2, device cuda:0.\n", "pipe": "stderr"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "line", "data": "Training in distributed mode with multiple processes, 1 device per process.Process 1, total 2, device cuda:1.\n", "pipe": "stderr"}
+{"event": "line", "data": "Model davit_large created, param count:196811752\n", "pipe": "stderr"}
+{"event": "line", "data": "Data processing configuration for current model + dataset:\n", "pipe": "stderr"}
+{"event": "line", "data": "\tinput_size: (3, 224, 224)\n", "pipe": "stderr"}
+{"event": "line", "data": "\tinterpolation: bicubic\n", "pipe": "stderr"}
+{"event": "line", "data": "\tmean: (0.485, 0.456, 0.406)\n", "pipe": "stderr"}
+{"event": "line", "data": "\tstd: (0.229, 0.224, 0.225)\n", "pipe": "stderr"}
+{"event": "line", "data": "\tcrop_pct: 0.95\n", "pipe": "stderr"}
+{"event": "line", "data": "\tcrop_mode: center\n", "pipe": "stderr"}
+{"event": "line", "data": "Learning rate (0.01) calculated from base learning rate (0.01) and global batch size (256) with linear scaling.\n", "pipe": "stderr"}
+{"event": "line", "data": "Using native Torch AMP. Training in mixed precision.\n", "pipe": "stderr"}
+{"event": "line", "data": "Using native Torch DistributedDataParallel.\n", "pipe": "stderr"}
+{"event": "line", "data": "Scheduled epochs: 300. LR stepped per epoch.\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7083.75, 24512.0], "load": 0.07, "temperature": null, "power": null}, "1": {"memory": [7083.75, 24512.0], "load": 0.07, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 148.00 MiB. GPU 1 has a total capacty of 23.73 GiB of which 146.25 MiB is free. Including non-PyTorch memory, this process has 21.44 GiB memory in use. Of the allocated memory 20.89 GiB is allocated by PyTorch, and 162.76 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/home/ubuntu/miniconda3/lib/python3.10/runpy.py\", line 196, in _run_module_as_main\n", "pipe": "stderr"}
+{"event": "line", "data": " return _run_code(code, main_globals, None,\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/home/ubuntu/miniconda3/lib/python3.10/runpy.py\", line 86, in _run_code\n", "pipe": "stderr"}
+{"event": "line", "data": " exec(code, run_globals)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/__main__.py\", line 4, in \n", "pipe": "stderr"}
+{"event": "line", "data": " main()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"}
+{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"}
+{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"}
+{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 1035, in \n", "pipe": "stderr"}
+{"event": "line", "data": " main()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 759, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " train_metrics = train_one_epoch(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 874, in train_one_epoch\n", "pipe": "stderr"}
+{"event": "line", "data": " output = model(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/parallel/distributed.py\", line 1519, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " else self._run_ddp_forward(*inputs, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/parallel/distributed.py\", line 1355, in _run_ddp_forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return self.module(*inputs, **kwargs) # type: ignore[index]\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/davit.py\", line 575, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " x = self.forward_features(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/davit.py\", line 563, in forward_features\n", "pipe": "stderr"}
+{"event": "line", "data": " x = self.stages(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/davit.py\", line 433, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " x = self.blocks(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/davit.py\", line 186, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " x = x + self.drop_path2(self.mlp(self.norm2(x)))\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/layers/mlp.py\", line 41, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " x = self.act(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/activation.py\", line 682, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return F.gelu(input, approximate=self.approximate)\n", "pipe": "stderr"}
+{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 148.00 MiB. GPU 1 has a total capacty of 23.73 GiB of which 146.25 MiB is free. Including non-PyTorch memory, this process has 21.44 GiB memory in use. Of the allocated memory 20.89 GiB is allocated by PyTorch, and 162.76 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24365.75, 24512.0], "load": 0.4, "temperature": null, "power": null}, "1": {"memory": [24365.75, 24512.0], "load": 0.39, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 148.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 146.25 MiB is free. Including non-PyTorch memory, this process has 21.44 GiB memory in use. Of the allocated memory 20.89 GiB is allocated by PyTorch, and 162.76 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/home/ubuntu/miniconda3/lib/python3.10/runpy.py\", line 196, in _run_module_as_main\n", "pipe": "stderr"}
+{"event": "line", "data": " return _run_code(code, main_globals, None,\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/home/ubuntu/miniconda3/lib/python3.10/runpy.py\", line 86, in _run_code\n", "pipe": "stderr"}
+{"event": "line", "data": " exec(code, run_globals)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/__main__.py\", line 4, in \n", "pipe": "stderr"}
+{"event": "line", "data": " main()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"}
+{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"}
+{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"}
+{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 1035, in \n", "pipe": "stderr"}
+{"event": "line", "data": " main()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 759, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " train_metrics = train_one_epoch(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 874, in train_one_epoch\n", "pipe": "stderr"}
+{"event": "line", "data": " output = model(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/parallel/distributed.py\", line 1519, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " else self._run_ddp_forward(*inputs, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/parallel/distributed.py\", line 1355, in _run_ddp_forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return self.module(*inputs, **kwargs) # type: ignore[index]\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/davit.py\", line 575, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " x = self.forward_features(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/davit.py\", line 563, in forward_features\n", "pipe": "stderr"}
+{"event": "line", "data": " x = self.stages(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/davit.py\", line 433, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " x = self.blocks(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/davit.py\", line 186, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " x = x + self.drop_path2(self.mlp(self.norm2(x)))\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/layers/mlp.py\", line 41, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " x = self.act(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/activation.py\", line 682, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return F.gelu(input, approximate=self.approximate)\n", "pipe": "stderr"}
+{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 148.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 146.25 MiB is free. Including non-PyTorch memory, this process has 21.44 GiB memory in use. Of the allocated memory 20.89 GiB is allocated by PyTorch, and 162.76 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"}
+{"event": "line", "data": "[2024-04-04 18:38:48,214] torch.distributed.elastic.multiprocessing.api: [ERROR] failed (exitcode: 1) local_rank: 0 (pid: 34749) of binary: /mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/python\n", "pipe": "stderr"}
+{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/torchrun\", line 8, in \n", "pipe": "stderr"}
+{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py\", line 346, in wrapper\n", "pipe": "stderr"}
+{"event": "line", "data": " return f(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/distributed/run.py\", line 806, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " run(args)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/distributed/run.py\", line 797, in run\n", "pipe": "stderr"}
+{"event": "line", "data": " elastic_launch(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/distributed/launcher/api.py\", line 134, in __call__\n", "pipe": "stderr"}
+{"event": "line", "data": " return launch_agent(self._config, self._entrypoint, list(args))\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/distributed/launcher/api.py\", line 264, in launch_agent\n", "pipe": "stderr"}
+{"event": "line", "data": " raise ChildFailedError(\n", "pipe": "stderr"}
+{"event": "line", "data": "torch.distributed.elastic.multiprocessing.errors.ChildFailedError: \n", "pipe": "stderr"}
+{"event": "line", "data": "============================================================\n", "pipe": "stderr"}
+{"event": "line", "data": "voir FAILED\n", "pipe": "stderr"}
+{"event": "line", "data": "------------------------------------------------------------\n", "pipe": "stderr"}
+{"event": "line", "data": "Failures:\n", "pipe": "stderr"}
+{"event": "line", "data": "[1]:\n", "pipe": "stderr"}
+{"event": "line", "data": " time : 2024-04-04_18:38:48\n", "pipe": "stderr"}
+{"event": "line", "data": " host : decentoriole.internal.cloudapp.net\n", "pipe": "stderr"}
+{"event": "line", "data": " rank : 1 (local_rank: 1)\n", "pipe": "stderr"}
+{"event": "line", "data": " exitcode : 1 (pid: 34750)\n", "pipe": "stderr"}
+{"event": "line", "data": " error_file: \n", "pipe": "stderr"}
+{"event": "line", "data": " traceback : To enable traceback see: https://pytorch.org/docs/stable/elastic/errors.html\n", "pipe": "stderr"}
+{"event": "line", "data": "------------------------------------------------------------\n", "pipe": "stderr"}
+{"event": "line", "data": "Root Cause (first observed failure):\n", "pipe": "stderr"}
+{"event": "line", "data": "[0]:\n", "pipe": "stderr"}
+{"event": "line", "data": " time : 2024-04-04_18:38:48\n", "pipe": "stderr"}
+{"event": "line", "data": " host : decentoriole.internal.cloudapp.net\n", "pipe": "stderr"}
+{"event": "line", "data": " rank : 0 (local_rank: 0)\n", "pipe": "stderr"}
+{"event": "line", "data": " exitcode : 1 (pid: 34749)\n", "pipe": "stderr"}
+{"event": "line", "data": " error_file: \n", "pipe": "stderr"}
+{"event": "line", "data": " traceback : To enable traceback see: https://pytorch.org/docs/stable/elastic/errors.html\n", "pipe": "stderr"}
+{"event": "line", "data": "============================================================\n", "pipe": "stderr"}
+{"event": "end", "data": {"command": ["torchrun", "--nproc_per_node=2", "--", "-m", "voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-davit_large-multi.0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "davit_large", "--batch-size", "128", "--lr-base", "0.01", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/tuzazolu.2024-04-04_18:28:45.589863/davit_large-multi.0", "--checkpoint-hist", "1"], "time": 1712255928.528487, "return_code": 1}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/davit_large.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/davit_large.D0.data
new file mode 100644
index 000000000..87b4b7e48
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/davit_large.D0.data
@@ -0,0 +1,92 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "timm", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm", "plan": {"method": "per_gpu"}, "argv": {"--amp": true, "--model": "davit_large", "--batch-size": 128, "--lr-base": 0.01}, "tags": ["classification", "transformer", "vision"], "weight": 1.0, "name": "davit_large", "tag": ["davit_large", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.75, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09830899151436032}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.75, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09830899151436032}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712255904.482029, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-davit_large.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "davit_large", "--batch-size", "128", "--lr-base", "0.01", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/tuzazolu.2024-04-04_18:28:45.589863/davit_large.D0", "--checkpoint-hist", "1"], "time": 1712255906.7814283}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "line", "data": "Training with a single process on 1 device (cuda:0).\n", "pipe": "stderr"}
+{"event": "line", "data": "Model davit_large created, param count:196811752\n", "pipe": "stderr"}
+{"event": "line", "data": "Data processing configuration for current model + dataset:\n", "pipe": "stderr"}
+{"event": "line", "data": "\tinput_size: (3, 224, 224)\n", "pipe": "stderr"}
+{"event": "line", "data": "\tinterpolation: bicubic\n", "pipe": "stderr"}
+{"event": "line", "data": "\tmean: (0.485, 0.456, 0.406)\n", "pipe": "stderr"}
+{"event": "line", "data": "\tstd: (0.229, 0.224, 0.225)\n", "pipe": "stderr"}
+{"event": "line", "data": "\tcrop_pct: 0.95\n", "pipe": "stderr"}
+{"event": "line", "data": "\tcrop_mode: center\n", "pipe": "stderr"}
+{"event": "line", "data": "Learning rate (0.005) calculated from base learning rate (0.01) and global batch size (128) with linear scaling.\n", "pipe": "stderr"}
+{"event": "line", "data": "Using native Torch AMP. Training in mixed precision.\n", "pipe": "stderr"}
+{"event": "line", "data": "Scheduled epochs: 300. LR stepped per epoch.\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6043.8125, 24512.0], "load": 0.1, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24437.8125, 24512.0], "load": 0.36, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 148.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 74.19 MiB is free. Including non-PyTorch memory, this process has 21.51 GiB memory in use. Of the allocated memory 21.02 GiB is allocated by PyTorch, and 188.39 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"}
+{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"}
+{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"}
+{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"}
+{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 1035, in \n", "pipe": "stderr"}
+{"event": "line", "data": " main()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 759, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " train_metrics = train_one_epoch(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 874, in train_one_epoch\n", "pipe": "stderr"}
+{"event": "line", "data": " output = model(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/davit.py\", line 575, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " x = self.forward_features(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/davit.py\", line 563, in forward_features\n", "pipe": "stderr"}
+{"event": "line", "data": " x = self.stages(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/davit.py\", line 433, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " x = self.blocks(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/davit.py\", line 353, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " x = x + self.drop_path2(self.mlp(self.norm2(x)))\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/layers/mlp.py\", line 40, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " x = self.fc1(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/linear.py\", line 114, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return F.linear(input, self.weight, self.bias)\n", "pipe": "stderr"}
+{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 148.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 74.19 MiB is free. Including non-PyTorch memory, this process has 21.51 GiB memory in use. Of the allocated memory 21.02 GiB is allocated by PyTorch, and 188.39 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-davit_large.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "davit_large", "--batch-size", "128", "--lr-base", "0.01", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/tuzazolu.2024-04-04_18:28:45.589863/davit_large.D0", "--checkpoint-hist", "1"], "time": 1712255914.8352365, "return_code": 1}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/davit_large.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/davit_large.D1.data
new file mode 100644
index 000000000..b157704c7
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/davit_large.D1.data
@@ -0,0 +1,92 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "timm", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm", "plan": {"method": "per_gpu"}, "argv": {"--amp": true, "--model": "davit_large", "--batch-size": 128, "--lr-base": 0.01}, "tags": ["classification", "transformer", "vision"], "weight": 1.0, "name": "davit_large", "tag": ["davit_large", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.75, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09830899151436032}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712255906.765655, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-davit_large.D1-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "davit_large", "--batch-size", "128", "--lr-base", "0.01", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/tuzazolu.2024-04-04_18:28:45.589863/davit_large.D1", "--checkpoint-hist", "1"], "time": 1712255906.7886434}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "line", "data": "Training with a single process on 1 device (cuda:0).\n", "pipe": "stderr"}
+{"event": "line", "data": "Model davit_large created, param count:196811752\n", "pipe": "stderr"}
+{"event": "line", "data": "Data processing configuration for current model + dataset:\n", "pipe": "stderr"}
+{"event": "line", "data": "\tinput_size: (3, 224, 224)\n", "pipe": "stderr"}
+{"event": "line", "data": "\tinterpolation: bicubic\n", "pipe": "stderr"}
+{"event": "line", "data": "\tmean: (0.485, 0.456, 0.406)\n", "pipe": "stderr"}
+{"event": "line", "data": "\tstd: (0.229, 0.224, 0.225)\n", "pipe": "stderr"}
+{"event": "line", "data": "\tcrop_pct: 0.95\n", "pipe": "stderr"}
+{"event": "line", "data": "\tcrop_mode: center\n", "pipe": "stderr"}
+{"event": "line", "data": "Learning rate (0.005) calculated from base learning rate (0.01) and global batch size (128) with linear scaling.\n", "pipe": "stderr"}
+{"event": "line", "data": "Using native Torch AMP. Training in mixed precision.\n", "pipe": "stderr"}
+{"event": "line", "data": "Scheduled epochs: 300. LR stepped per epoch.\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [6043.8125, 24512.0], "load": 0.11, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24437.8125, 24512.0], "load": 0.3, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 148.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 74.19 MiB is free. Including non-PyTorch memory, this process has 21.51 GiB memory in use. Of the allocated memory 21.02 GiB is allocated by PyTorch, and 188.39 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"}
+{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"}
+{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"}
+{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"}
+{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 1035, in \n", "pipe": "stderr"}
+{"event": "line", "data": " main()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 759, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " train_metrics = train_one_epoch(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 874, in train_one_epoch\n", "pipe": "stderr"}
+{"event": "line", "data": " output = model(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/davit.py\", line 575, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " x = self.forward_features(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/davit.py\", line 563, in forward_features\n", "pipe": "stderr"}
+{"event": "line", "data": " x = self.stages(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/davit.py\", line 433, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " x = self.blocks(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/davit.py\", line 353, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " x = x + self.drop_path2(self.mlp(self.norm2(x)))\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/layers/mlp.py\", line 40, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " x = self.fc1(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/linear.py\", line 114, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return F.linear(input, self.weight, self.bias)\n", "pipe": "stderr"}
+{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 148.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 74.19 MiB is free. Including non-PyTorch memory, this process has 21.51 GiB memory in use. Of the allocated memory 21.02 GiB is allocated by PyTorch, and 188.39 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-davit_large.D1-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "davit_large", "--batch-size", "128", "--lr-base", "0.01", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/tuzazolu.2024-04-04_18:28:45.589863/davit_large.D1", "--checkpoint-hist", "1"], "time": 1712255914.2563057, "return_code": 1}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/dlrm.0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/dlrm.0.data
new file mode 100644
index 000000000..a422f184b
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/dlrm.0.data
@@ -0,0 +1,266 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/dlrm", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "dlrm", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "tags": ["nlp", "rl"], "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/dlrm", "plan": {"method": "njobs", "n": 1}, "argv": {"--num-batches": 1000, "--data-generation": "random", "--arch-mlp-bot": "512-512-64", "--arch-mlp-top": "1024-1024-1024-1", "--arch-sparse-feature-size": 64, "--arch-embedding-size": "1000000-1000000-1000000-1000000-1000000-1000000-1000000-1000000", "--num-indices-per-lookup": 100, "--arch-interaction-op": "dot", "--numpy-rand-seed": "727", "--print-freq": 999999, "--mini-batch-size": 16384, "--test-mini-batch-size": 16384, "--test-num-workers": 0, "--use-gpu": true}, "weight": 1.0, "name": "dlrm", "tag": ["dlrm", "0"], "job-number": 0, "devices": ["0", "1"]}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712256080.036951, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/dlrm/voirconf-dlrm.0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/dlrm/dlrm/dlrm_s_pytorch.py", "--num-batches", "1000", "--data-generation", "random", "--arch-mlp-bot", "512-512-64", "--arch-mlp-top", "1024-1024-1024-1", "--arch-sparse-feature-size", "64", "--arch-embedding-size", "1000000-1000000-1000000-1000000-1000000-1000000-1000000-1000000", "--num-indices-per-lookup", "100", "--arch-interaction-op", "dot", "--numpy-rand-seed", "727", "--print-freq", "999999", "--mini-batch-size", "16384", "--test-mini-batch-size", "16384", "--test-num-workers", "0", "--use-gpu"], "time": 1712256080.0531104}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "line", "data": "Unable to import mlperf_logging, No module named 'mlperf_logging'\n", "pipe": "stdout"}
+{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py:347: UserWarning: torch.distributed.reduce_op is deprecated, please use torch.distributed.ReduceOp instead\n", "pipe": "stderr"}
+{"event": "line", "data": " warnings.warn(\n", "pipe": "stderr"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "line", "data": "world size: 1, current rank: 0, local rank: 0\n", "pipe": "stdout"}
+{"event": "line", "data": "Using 2 GPU(s)...\n", "pipe": "stdout"}
+{"event": "line", "data": "time/loss/accuracy (if enabled):\n", "pipe": "stdout"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2661.8125, 24512.0], "load": 0, "temperature": null, "power": null}, "1": {"memory": [2412.5625, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2661.8125, 24512.0], "load": 0, "temperature": null, "power": null}, "1": {"memory": [2412.5625, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.0887361615896225}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2695.8125, 24512.0], "load": 0, "temperature": null, "power": null}, "1": {"memory": [2659.8125, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4973.8125, 24512.0], "load": 0.03, "temperature": null, "power": null}, "1": {"memory": [4955.8125, 24512.0], "load": 0.03, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08788755536079407}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4981.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [4963.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08937834203243256}, "pipe": "data"}
+{"event": "data", "data": {"rate": 371425.095795438, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4981.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5367.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08813147246837616}, "pipe": "data"}
+{"event": "data", "data": {"rate": 377784.3698403836, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5183.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5367.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08771650493144989}, "pipe": "data"}
+{"event": "data", "data": {"rate": 376599.89734067005, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5183.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5367.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08742949366569519}, "pipe": "data"}
+{"event": "data", "data": {"rate": 380970.32751775376, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5385.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5367.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08757737278938293}, "pipe": "data"}
+{"event": "data", "data": {"rate": 377836.4252393994, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5385.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5367.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08654382824897766}, "pipe": "data"}
+{"event": "data", "data": {"rate": 375045.5186776056, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5385.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5569.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.0858154445886612}, "pipe": "data"}
+{"event": "data", "data": {"rate": 370671.1267259742, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5385.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5569.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08628799766302109}, "pipe": "data"}
+{"event": "data", "data": {"rate": 378051.5421037138, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5385.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5569.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08730762451887131}, "pipe": "data"}
+{"event": "data", "data": {"rate": 382371.943906708, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5385.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5569.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08590050786733627}, "pipe": "data"}
+{"event": "data", "data": {"rate": 372934.675634125, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5385.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5569.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08602667599916458}, "pipe": "data"}
+{"event": "data", "data": {"rate": 380600.03742350225, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5385.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5569.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08581672608852386}, "pipe": "data"}
+{"event": "data", "data": {"rate": 371078.53237081517, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5385.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5569.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08588778972625732}, "pipe": "data"}
+{"event": "data", "data": {"rate": 376734.87398213864, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5385.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5569.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08632193505764008}, "pipe": "data"}
+{"event": "data", "data": {"rate": 384627.1947390212, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5385.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5569.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08664406836032867}, "pipe": "data"}
+{"event": "data", "data": {"rate": 380136.2741211303, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5385.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5569.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.0852133184671402}, "pipe": "data"}
+{"event": "data", "data": {"rate": 373562.4662382433, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5385.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5569.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08576950430870056}, "pipe": "data"}
+{"event": "data", "data": {"rate": 381554.8237970788, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5587.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5569.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08471724390983582}, "pipe": "data"}
+{"event": "data", "data": {"rate": 379318.91418293887, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5587.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5569.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08532554656267166}, "pipe": "data"}
+{"event": "data", "data": {"rate": 380023.48472767865, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5587.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5569.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08427020162343979}, "pipe": "data"}
+{"event": "data", "data": {"rate": 381719.00527997245, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5587.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5569.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08591149747371674}, "pipe": "data"}
+{"event": "data", "data": {"rate": 381323.215296684, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5587.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5569.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.0845126137137413}, "pipe": "data"}
+{"event": "data", "data": {"rate": 375919.06176255655, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5587.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5569.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.0840827077627182}, "pipe": "data"}
+{"event": "data", "data": {"rate": 383527.7185739894, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5587.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5569.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08428709208965302}, "pipe": "data"}
+{"event": "data", "data": {"rate": 375731.72974651406, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5587.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5569.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08434845507144928}, "pipe": "data"}
+{"event": "data", "data": {"rate": 386745.5702536759, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5587.8125, 24512.0], "load": 0, "temperature": null, "power": null}, "1": {"memory": [5569.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08465415984392166}, "pipe": "data"}
+{"event": "data", "data": {"rate": 382363.8055317849, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5791.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5569.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08376755565404892}, "pipe": "data"}
+{"event": "data", "data": {"rate": 375980.672977867, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5791.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5569.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.083040751516819}, "pipe": "data"}
+{"event": "data", "data": {"rate": 387707.165043277, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5791.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5569.8125, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08479100465774536}, "pipe": "data"}
+{"event": "data", "data": {"rate": 384212.8839543423, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5791.8125, 24512.0], "load": 0, "temperature": null, "power": null}, "1": {"memory": [5569.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08379324525594711}, "pipe": "data"}
+{"event": "data", "data": {"rate": 379524.0211904309, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5791.8125, 24512.0], "load": 0, "temperature": null, "power": null}, "1": {"memory": [5569.8125, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08349813520908356}, "pipe": "data"}
+{"event": "data", "data": {"rate": 382142.0878639916, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5791.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5569.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08437865972518921}, "pipe": "data"}
+{"event": "data", "data": {"rate": 376865.15358129086, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5791.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5569.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08468881249427795}, "pipe": "data"}
+{"event": "data", "data": {"rate": 380369.1977097986, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08403709530830383}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5791.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5569.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 377831.3628211679, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08420669287443161}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5995.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5569.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 379886.4500295244, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08320620656013489}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5995.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5569.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 371345.4072569379, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08308559656143188}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5995.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5569.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 376269.65747151565, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08385886251926422}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5995.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5569.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 375092.7086134967, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08402976393699646}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5995.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5569.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 379473.8025035265, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08415201306343079}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5995.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5773.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 369139.1555031587, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08402085304260254}, "pipe": "data"}
+{"event": "data", "data": {"rate": 377018.469187657, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5995.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5773.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08228246122598648}, "pipe": "data"}
+{"event": "data", "data": {"rate": 380442.850431833, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5995.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5773.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08508102595806122}, "pipe": "data"}
+{"event": "data", "data": {"rate": 380623.6452450096, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5995.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5773.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08269783854484558}, "pipe": "data"}
+{"event": "data", "data": {"rate": 381914.9561826947, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5995.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5773.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08433524519205093}, "pipe": "data"}
+{"event": "data", "data": {"rate": 374173.10347631114, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5995.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5773.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08258543908596039}, "pipe": "data"}
+{"event": "data", "data": {"rate": 382431.13601934566, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5995.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5773.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08313082903623581}, "pipe": "data"}
+{"event": "data", "data": {"rate": 381339.51930086757, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5995.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5773.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08317264169454575}, "pipe": "data"}
+{"event": "data", "data": {"rate": 376029.14280939166, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5995.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5773.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08357995748519897}, "pipe": "data"}
+{"event": "data", "data": {"rate": 383762.0944846918, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5995.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5773.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08405735343694687}, "pipe": "data"}
+{"event": "data", "data": {"rate": 383303.0711690678, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5995.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5773.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08272015303373337}, "pipe": "data"}
+{"event": "data", "data": {"rate": 375138.7164930135, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5995.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5773.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08338481932878494}, "pipe": "data"}
+{"event": "data", "data": {"rate": 386663.1241426989, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5995.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5773.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08422383666038513}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5995.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5773.8125, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 382722.0521975041, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08405455201864243}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5995.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5773.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 383164.9160133561, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08417406678199768}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5995.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5773.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 384577.09699264885, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08274278044700623}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5995.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5773.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 381269.55594709533, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08461865782737732}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5995.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5773.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 380308.2144601289, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08379694819450378}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5995.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5773.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 383034.0688745473, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08401922881603241}, "pipe": "data"}
+{"event": "data", "data": {"rate": 384572.2495162216, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5995.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5773.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08402209728956223}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5995.8125, 24512.0], "load": 0, "temperature": null, "power": null}, "1": {"memory": [5773.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 381272.69682574656, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08343112468719482}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5995.8125, 24512.0], "load": 0, "temperature": null, "power": null}, "1": {"memory": [5773.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 377910.14628079697, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5995.8125, 24512.0], "load": 0, "temperature": null, "power": null}, "1": {"memory": [5773.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/dlrm/voirconf-dlrm.0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/dlrm/dlrm/dlrm_s_pytorch.py", "--num-batches", "1000", "--data-generation", "random", "--arch-mlp-bot", "512-512-64", "--arch-mlp-top", "1024-1024-1024-1", "--arch-sparse-feature-size", "64", "--arch-embedding-size", "1000000-1000000-1000000-1000000-1000000-1000000-1000000-1000000", "--num-indices-per-lookup", "100", "--arch-interaction-op", "dot", "--numpy-rand-seed", "727", "--print-freq", "999999", "--mini-batch-size", "16384", "--test-mini-batch-size", "16384", "--test-num-workers", "0", "--use-gpu"], "time": 1712256281.3275568, "return_code": 0}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/focalnet.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/focalnet.D0.data
new file mode 100644
index 000000000..d9c93e03f
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/focalnet.D0.data
@@ -0,0 +1,243 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "timm", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm", "plan": {"method": "per_gpu"}, "argv": {"--amp": true, "--model": "focalnet_base_lrf"}, "tags": ["classification", "convnet", "vision"], "weight": 2.0, "name": "focalnet", "tag": ["focalnet", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.75, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09830899151436032}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.75, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09830899151436032}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712255930.817689, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-focalnet.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "focalnet_base_lrf", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/tuzazolu.2024-04-04_18:28:45.589863/focalnet.D0", "--checkpoint-hist", "1"], "time": 1712255933.1109104}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "line", "data": "Training with a single process on 1 device (cuda:0).\n", "pipe": "stderr"}
+{"event": "line", "data": "Model focalnet_base_lrf created, param count:88749768\n", "pipe": "stderr"}
+{"event": "line", "data": "Data processing configuration for current model + dataset:\n", "pipe": "stderr"}
+{"event": "line", "data": "\tinput_size: (3, 224, 224)\n", "pipe": "stderr"}
+{"event": "line", "data": "\tinterpolation: bicubic\n", "pipe": "stderr"}
+{"event": "line", "data": "\tmean: (0.485, 0.456, 0.406)\n", "pipe": "stderr"}
+{"event": "line", "data": "\tstd: (0.229, 0.224, 0.225)\n", "pipe": "stderr"}
+{"event": "line", "data": "\tcrop_pct: 0.9\n", "pipe": "stderr"}
+{"event": "line", "data": "\tcrop_mode: center\n", "pipe": "stderr"}
+{"event": "line", "data": "Learning rate (0.05) calculated from base learning rate (0.1) and global batch size (128) with linear scaling.\n", "pipe": "stderr"}
+{"event": "line", "data": "Using native Torch AMP. Training in mixed precision.\n", "pipe": "stderr"}
+{"event": "line", "data": "Scheduled epochs: 300. LR stepped per epoch.\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "train", "loss": 7.004446029663086}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5645.8125, 24512.0], "load": 0.36, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "line", "data": "Train: 0 [ 0/32 ( 0%)] Loss: 7.004 (7.00) Time: 17.246s, 7.42/s (17.246s, 7.42/s) LR: 1.000e-05 Data: 0.572 (0.572)\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [23613.8125, 24512.0], "load": 0.94, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [11819.8125, 24512.0], "load": 0.98, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [9069.8125, 24512.0], "load": 0.98, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [8879.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [8291.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.006705284118652}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0131425857543945}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24451.8125, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.935508728027344}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.996306419372559}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24451.8125, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 156.1831624908965, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.995440483093262}, "pipe": "data"}
+{"event": "data", "data": {"rate": 168.9586201588784, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.98853874206543}, "pipe": "data"}
+{"event": "data", "data": {"rate": 159.01465219145024, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24453.8125, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.060269355773926}, "pipe": "data"}
+{"event": "data", "data": {"rate": 176.27258397976726, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"}
+{"event": "data", "data": {"rate": 145.19968058021394, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.003750801086426}, "pipe": "data"}
+{"event": "data", "data": {"rate": 172.14520218366917, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24453.8125, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.056220054626465}, "pipe": "data"}
+{"event": "data", "data": {"rate": 151.83865069816747, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.990062713623047}, "pipe": "data"}
+{"event": "data", "data": {"rate": 175.40664860799035, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"}
+{"event": "data", "data": {"rate": 145.45153278967254, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24453.8125, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.029505729675293}, "pipe": "data"}
+{"event": "data", "data": {"rate": 175.45757433453755, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9779558181762695}, "pipe": "data"}
+{"event": "data", "data": {"rate": 145.02238583108993, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.975492477416992}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24453.8125, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 175.56470216716664, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"}
+{"event": "data", "data": {"rate": 144.91141828218414, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.979888916015625}, "pipe": "data"}
+{"event": "data", "data": {"rate": 176.48576057662927, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.069397926330566}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24455.8125, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 142.77113886157207, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.005284309387207}, "pipe": "data"}
+{"event": "line", "data": "Train: 0 [ 31/32 (100%)] Loss: 7.005 (7.00) Time: 0.723s, 177.16/s (1.253s, 102.19/s) LR: 1.000e-05 Data: 0.000 (0.026)\n", "pipe": "stderr"}
+{"event": "data", "data": {"rate": 167.20304259742375, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"}
+{"event": "line", "data": "Test: [ 0/32] Time: 0.810 (0.810) Loss: 6.9615 (6.9615) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"}
+{"event": "line", "data": "Test: [ 32/32] Time: 1.168 (0.296) Loss: 6.8639 (6.9459) Acc@1: 0.0000 ( 0.1453) Acc@5: 3.1250 ( 0.6541)\n", "pipe": "stderr"}
+{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"}
+{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/tuzazolu.2024-04-04_18:28:45.589863/focalnet.D0/20240404-183857-focalnet_base_lrf-224/checkpoint-0.pth.tar', 0.14534883720930233)\n", "pipe": "stderr"}
+{"event": "line", "data": "\n", "pipe": "stderr"}
+{"event": "data", "data": {"rate": 144.034754776132, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6313.8125, 24512.0], "load": 0.83, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6313.8125, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6313.8125, 24512.0], "load": 0.96, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4565.8125, 24512.0], "load": 0.82, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.020685195922852}, "pipe": "data"}
+{"event": "line", "data": "Train: 1 [ 0/32 ( 0%)] Loss: 7.021 (7.02) Time: 1.279s, 100.05/s (1.279s, 100.05/s) LR: 1.001e-02 Data: 0.458 (0.458)\n", "pipe": "stderr"}
+{"event": "data", "data": {"rate": 155.5760635598423, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.960961818695068}, "pipe": "data"}
+{"event": "data", "data": {"rate": 143.27522267530088, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24357.8125, 24512.0], "load": 0.68, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0037946701049805}, "pipe": "data"}
+{"event": "data", "data": {"rate": 142.41033852596325, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"}
+{"event": "data", "data": {"rate": 130.77803795489464, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.034621238708496}, "pipe": "data"}
+{"event": "data", "data": {"rate": 167.05587989425635, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [10173.8125, 24512.0], "load": 0.98, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 111.55162576995241, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.963570594787598}, "pipe": "data"}
+{"event": "data", "data": {"rate": 144.10494127118514, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.019160270690918}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24467.8125, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 136.77740915494263, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.061408042907715}, "pipe": "data"}
+{"event": "data", "data": {"rate": 171.38995765367892, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"}
+{"event": "data", "data": {"rate": 139.07770271576695, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.034209728240967}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24469.8125, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 176.73007257043497, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.999085903167725}, "pipe": "data"}
+{"event": "data", "data": {"rate": 140.73386821241007, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.063625335693359}, "pipe": "data"}
+{"event": "data", "data": {"rate": 156.30374000005074, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [22773.8125, 24512.0], "load": 0.95, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 118.87836763016499, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.087857246398926}, "pipe": "data"}
+{"event": "data", "data": {"rate": 146.87189445188082, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"}
+{"event": "data", "data": {"rate": 148.59329772368477, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.993795871734619}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24457.8125, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 175.54020096884477, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.007907390594482}, "pipe": "data"}
+{"event": "data", "data": {"rate": 132.59430062894253, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.070873260498047}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24451.8125, 24512.0], "load": 0.95, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 167.07431821359702, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"}
+{"event": "data", "data": {"rate": 138.12219783714386, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.197940826416016}, "pipe": "data"}
+{"event": "data", "data": {"rate": 166.13797086400245, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24311.8125, 24512.0], "load": 0.96, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 116.65194822861136, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.147008895874023}, "pipe": "data"}
+{"event": "data", "data": {"rate": 167.51212007607452, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.1084442138671875}, "pipe": "data"}
+{"event": "line", "data": "Train: 1 [ 31/32 (100%)] Loss: 7.108 (7.05) Time: 0.724s, 176.79/s (0.821s, 155.99/s) LR: 1.001e-02 Data: 0.000 (0.026)\n", "pipe": "stderr"}
+{"event": "data", "data": {"rate": 139.39110599911172, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"}
+{"event": "line", "data": "Test: [ 0/32] Time: 0.770 (0.770) Loss: 6.8921 (6.8921) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"}
+{"event": "line", "data": "Test: [ 32/32] Time: 0.067 (0.262) Loss: 6.9395 (6.9699) Acc@1: 0.0000 ( 0.2665) Acc@5: 0.0000 ( 0.9932)\n", "pipe": "stderr"}
+{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"}
+{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/tuzazolu.2024-04-04_18:28:45.589863/focalnet.D0/20240404-183857-focalnet_base_lrf-224/checkpoint-1.pth.tar', 0.26647286821705424)\n", "pipe": "stderr"}
+{"event": "line", "data": "\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4631.8125, 24512.0], "load": 0.89, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 176.76094961307055, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6173.8125, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6173.8125, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6173.8125, 24512.0], "load": 0.71, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.995844841003418}, "pipe": "data"}
+{"event": "line", "data": "Train: 2 [ 0/32 ( 0%)] Loss: 6.996 (7.00) Time: 1.241s, 103.11/s (1.241s, 103.11/s) LR: 2.001e-02 Data: 0.426 (0.426)\n", "pipe": "stderr"}
+{"event": "data", "data": {"rate": 133.72496430340055, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.03409481048584}, "pipe": "data"}
+{"event": "data", "data": {"rate": 157.239503859884, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24431.8125, 24512.0], "load": 0.8, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0975728034973145}, "pipe": "data"}
+{"event": "data", "data": {"rate": 146.63419993022822, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.019063949584961}, "pipe": "data"}
+{"event": "data", "data": {"rate": 157.34490851642497, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24433.8125, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 134.12059622527286, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0097246170043945}, "pipe": "data"}
+{"event": "data", "data": {"rate": 174.23972461465974, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.088225364685059}, "pipe": "data"}
+{"event": "data", "data": {"rate": 137.43146636351116, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24433.8125, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.010916709899902}, "pipe": "data"}
+{"event": "data", "data": {"rate": 175.27376516031114, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"}
+{"event": "data", "data": {"rate": 137.7812325799918, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.101739406585693}, "pipe": "data"}
+{"event": "data", "data": {"rate": 174.27403734754023, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24433.8125, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.231767654418945}, "pipe": "data"}
+{"event": "data", "data": {"rate": 137.11038524135242, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.292592525482178}, "pipe": "data"}
+{"event": "data", "data": {"rate": 174.9156869321893, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24433.8125, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 137.36997214432094, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.112415313720703}, "pipe": "data"}
+{"event": "data", "data": {"rate": 173.80298887533507, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.197756767272949}, "pipe": "data"}
+{"event": "data", "data": {"rate": 136.54599227559748, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24433.8125, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.235831260681152}, "pipe": "data"}
+{"event": "data", "data": {"rate": 174.0547052075538, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"}
+{"event": "data", "data": {"rate": 135.98386842916506, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.275215148925781}, "pipe": "data"}
+{"event": "data", "data": {"rate": 172.65723573592533, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24433.8125, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.224541664123535}, "pipe": "data"}
+{"event": "data", "data": {"rate": 136.7135274497949, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"}
+{"event": "stop", "data": null, "pipe": "data"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-focalnet.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "focalnet_base_lrf", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/tuzazolu.2024-04-04_18:28:45.589863/focalnet.D0", "--checkpoint-hist", "1"], "time": 1712256046.0211246, "return_code": -15}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/focalnet.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/focalnet.D1.data
new file mode 100644
index 000000000..5a1319f7f
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/focalnet.D1.data
@@ -0,0 +1,253 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "timm", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm", "plan": {"method": "per_gpu"}, "argv": {"--amp": true, "--model": "focalnet_base_lrf"}, "tags": ["classification", "convnet", "vision"], "weight": 2.0, "name": "focalnet", "tag": ["focalnet", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.75, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09830899151436032}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712255933.09558, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-focalnet.D1-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "focalnet_base_lrf", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/tuzazolu.2024-04-04_18:28:45.589863/focalnet.D1", "--checkpoint-hist", "1"], "time": 1712255933.1184373}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "line", "data": "Training with a single process on 1 device (cuda:0).\n", "pipe": "stderr"}
+{"event": "line", "data": "Model focalnet_base_lrf created, param count:88749768\n", "pipe": "stderr"}
+{"event": "line", "data": "Data processing configuration for current model + dataset:\n", "pipe": "stderr"}
+{"event": "line", "data": "\tinput_size: (3, 224, 224)\n", "pipe": "stderr"}
+{"event": "line", "data": "\tinterpolation: bicubic\n", "pipe": "stderr"}
+{"event": "line", "data": "\tmean: (0.485, 0.456, 0.406)\n", "pipe": "stderr"}
+{"event": "line", "data": "\tstd: (0.229, 0.224, 0.225)\n", "pipe": "stderr"}
+{"event": "line", "data": "\tcrop_pct: 0.9\n", "pipe": "stderr"}
+{"event": "line", "data": "\tcrop_mode: center\n", "pipe": "stderr"}
+{"event": "line", "data": "Learning rate (0.05) calculated from base learning rate (0.1) and global batch size (128) with linear scaling.\n", "pipe": "stderr"}
+{"event": "line", "data": "Using native Torch AMP. Training in mixed precision.\n", "pipe": "stderr"}
+{"event": "line", "data": "Scheduled epochs: 300. LR stepped per epoch.\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "train", "loss": 7.004453659057617}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5645.8125, 24512.0], "load": 0.35, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "line", "data": "Train: 0 [ 0/32 ( 0%)] Loss: 7.004 (7.00) Time: 17.370s, 7.37/s (17.370s, 7.37/s) LR: 1.000e-05 Data: 0.579 (0.579)\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [23531.8125, 24512.0], "load": 0.94, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [13601.8125, 24512.0], "load": 0.98, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [11127.8125, 24512.0], "load": 0.98, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [10937.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [10637.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.006714820861816}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.013139724731445}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.036353588104248}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [17027.8125, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9829301834106445}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.996297359466553}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24241.8125, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 115.98848552582137, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.995445728302002}, "pipe": "data"}
+{"event": "data", "data": {"rate": 143.35521037626873, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.995490550994873}, "pipe": "data"}
+{"event": "data", "data": {"rate": 151.07365948930976, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [18917.8125, 24512.0], "load": 0.98, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.970373153686523}, "pipe": "data"}
+{"event": "data", "data": {"rate": 133.24616771555847, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"}
+{"event": "data", "data": {"rate": 171.80614457696456, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.009486198425293}, "pipe": "data"}
+{"event": "data", "data": {"rate": 109.52578798411592, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24229.8125, 24512.0], "load": 0.98, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.941388130187988}, "pipe": "data"}
+{"event": "data", "data": {"rate": 173.86886058959016, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"}
+{"event": "data", "data": {"rate": 115.87421045508268, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.961498260498047}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24453.8125, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 143.38121255069655, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.990070819854736}, "pipe": "data"}
+{"event": "data", "data": {"rate": 144.5995550692025, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.029526233673096}, "pipe": "data"}
+{"event": "data", "data": {"rate": 132.0436047141948, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24427.8125, 24512.0], "load": 0.98, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.946541786193848}, "pipe": "data"}
+{"event": "data", "data": {"rate": 173.38762901915706, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"}
+{"event": "data", "data": {"rate": 108.81581815830793, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.03203821182251}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.975484371185303}, "pipe": "data"}
+{"event": "data", "data": {"rate": 172.29524914479907, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [20309.8125, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.979880332946777}, "pipe": "data"}
+{"event": "data", "data": {"rate": 141.1807501037705, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.980224609375}, "pipe": "data"}
+{"event": "data", "data": {"rate": 169.08453500743647, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24231.8125, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 105.37381383095436, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.979129314422607}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.005289077758789}, "pipe": "data"}
+{"event": "data", "data": {"rate": 170.7990981385958, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"}
+{"event": "line", "data": "Train: 0 [ 31/32 (100%)] Loss: 7.005 (7.00) Time: 1.059s, 120.84/s (1.373s, 93.21/s) LR: 1.000e-05 Data: 0.000 (0.037)\n", "pipe": "stderr"}
+{"event": "line", "data": "Test: [ 0/32] Time: 0.750 (0.750) Loss: 6.9615 (6.9615) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"}
+{"event": "line", "data": "Test: [ 32/32] Time: 1.178 (0.293) Loss: 6.8639 (6.9459) Acc@1: 0.0000 ( 0.1453) Acc@5: 3.1250 ( 0.6541)\n", "pipe": "stderr"}
+{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"}
+{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/tuzazolu.2024-04-04_18:28:45.589863/focalnet.D1/20240404-183857-focalnet_base_lrf-224/checkpoint-0.pth.tar', 0.14534883720930233)\n", "pipe": "stderr"}
+{"event": "line", "data": "\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [11701.8125, 24512.0], "load": 0.98, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 120.76732395043811, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [11945.8125, 24512.0], "load": 0.86, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [11945.8125, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [8217.8125, 24512.0], "load": 0.95, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.020687103271484}, "pipe": "data"}
+{"event": "line", "data": "Train: 1 [ 0/32 ( 0%)] Loss: 7.021 (7.02) Time: 1.248s, 102.53/s (1.248s, 102.53/s) LR: 1.001e-02 Data: 0.442 (0.442)\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24245.8125, 24512.0], "load": 0.55, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 136.4557667811463, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.017704486846924}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.003789901733398}, "pipe": "data"}
+{"event": "data", "data": {"rate": 161.30785485007573, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"}
+{"event": "data", "data": {"rate": 116.98455657074206, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0345940589904785}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24321.8125, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 141.95076724644764, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0620293617248535}, "pipe": "data"}
+{"event": "data", "data": {"rate": 162.53874838047597, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.065659523010254}, "pipe": "data"}
+{"event": "data", "data": {"rate": 128.57425847507284, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24427.8125, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.019184589385986}, "pipe": "data"}
+{"event": "data", "data": {"rate": 172.53952896110098, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"}
+{"event": "data", "data": {"rate": 108.6217930442419, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.061338901519775}, "pipe": "data"}
+{"event": "data", "data": {"rate": 172.91723824200844, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.077922344207764}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [10109.8125, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0374860763549805}, "pipe": "data"}
+{"event": "data", "data": {"rate": 132.9027164186082, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.999045372009277}, "pipe": "data"}
+{"event": "data", "data": {"rate": 171.79069501659433, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24255.8125, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 107.25865138624613, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0636396408081055}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.077389717102051}, "pipe": "data"}
+{"event": "data", "data": {"rate": 172.08750011312569, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [11651.8125, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.036355495452881}, "pipe": "data"}
+{"event": "data", "data": {"rate": 138.26715500832663, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9937944412231445}, "pipe": "data"}
+{"event": "data", "data": {"rate": 170.3041171311696, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [19013.8125, 24512.0], "load": 0.96, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 99.81872724174447, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0078935623168945}, "pipe": "data"}
+{"event": "data", "data": {"rate": 163.18666430419972, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.098411560058594}, "pipe": "data"}
+{"event": "data", "data": {"rate": 155.33655016720704, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24243.8125, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.043401718139648}, "pipe": "data"}
+{"event": "data", "data": {"rate": 138.53100217684747, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.197937488555908}, "pipe": "data"}
+{"event": "data", "data": {"rate": 158.869438605572, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"}
+{"event": "data", "data": {"rate": 107.38729979115578, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.147041320800781}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24389.8125, 24512.0], "load": 0.95, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 173.4800351890698, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9818925857543945}, "pipe": "data"}
+{"event": "line", "data": "Train: 1 [ 31/32 (100%)] Loss: 7.108 (7.05) Time: 0.737s, 173.70/s (0.885s, 144.66/s) LR: 1.001e-02 Data: 0.000 (0.033)\n", "pipe": "stderr"}
+{"event": "line", "data": "Test: [ 0/32] Time: 0.602 (0.602) Loss: 6.8922 (6.8922) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"}
+{"event": "line", "data": "Test: [ 32/32] Time: 0.067 (0.256) Loss: 6.9393 (6.9700) Acc@1: 0.0000 ( 0.2665) Acc@5: 0.0000 ( 0.9932)\n", "pipe": "stderr"}
+{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"}
+{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/tuzazolu.2024-04-04_18:28:45.589863/focalnet.D1/20240404-183857-focalnet_base_lrf-224/checkpoint-1.pth.tar', 0.26647286821705424)\n", "pipe": "stderr"}
+{"event": "line", "data": "\n", "pipe": "stderr"}
+{"event": "data", "data": {"rate": 141.3450924514315, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24343.8125, 24512.0], "load": 0.86, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24417.8125, 24512.0], "load": 0.96, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24417.8125, 24512.0], "load": 0.96, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24417.8125, 24512.0], "load": 0.65, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.995919227600098}, "pipe": "data"}
+{"event": "line", "data": "Train: 2 [ 0/32 ( 0%)] Loss: 6.996 (7.00) Time: 1.448s, 88.39/s (1.448s, 88.39/s) LR: 2.001e-02 Data: 0.471 (0.471)\n", "pipe": "stderr"}
+{"event": "data", "data": {"rate": 128.8976399421159, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.034114360809326}, "pipe": "data"}
+{"event": "data", "data": {"rate": 168.03100513357694, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24279.8125, 24512.0], "load": 0.92, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.1502685546875}, "pipe": "data"}
+{"event": "data", "data": {"rate": 73.15334612043536, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.097559928894043}, "pipe": "data"}
+{"event": "data", "data": {"rate": 158.33936876412403, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24441.8125, 24512.0], "load": 0.98, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.019028663635254}, "pipe": "data"}
+{"event": "data", "data": {"rate": 144.59166895573824, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"}
+{"event": "data", "data": {"rate": 138.65953494458603, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0096940994262695}, "pipe": "data"}
+{"event": "data", "data": {"rate": 162.1534673672443, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24447.8125, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.088198661804199}, "pipe": "data"}
+{"event": "data", "data": {"rate": 141.75870888896802, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.160571098327637}, "pipe": "data"}
+{"event": "data", "data": {"rate": 159.92693657938904, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"}
+{"event": "data", "data": {"rate": 104.13960070986302, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.106841087341309}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24427.8125, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 175.52545067508473, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.141146659851074}, "pipe": "data"}
+{"event": "data", "data": {"rate": 138.59470403454478, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.052305698394775}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24429.8125, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 176.52135102210215, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"}
+{"event": "data", "data": {"rate": 138.84214160154892, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.252406120300293}, "pipe": "data"}
+{"event": "data", "data": {"rate": 176.64217808990978, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.08305025100708}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24429.8125, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 139.38617068063758, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.190241813659668}, "pipe": "data"}
+{"event": "data", "data": {"rate": 176.09690045620206, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"}
+{"event": "data", "data": {"rate": 138.06042983535764, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.183157920837402}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24429.8125, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 175.24722380861084, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"}
+{"event": "stop", "data": null, "pipe": "data"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-focalnet.D1-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "focalnet_base_lrf", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/tuzazolu.2024-04-04_18:28:45.589863/focalnet.D1", "--checkpoint-hist", "1"], "time": 1712256051.804079, "return_code": -15}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/fp16.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/fp16.D0.data
new file mode 100644
index 000000000..96bd83b08
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/fp16.D0.data
@@ -0,0 +1,220 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/flops", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "flops", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops", "plan": {"method": "per_gpu"}, "tags": ["diagnostic", "flops"], "argv": {"--number": 30, "--repeat": 90, "--m": 8192, "--n": 8192, "--dtype": "fp16"}, "weight": 0.0, "name": "fp16", "tag": ["fp16", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0.02, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712255396.49982, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "30", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "fp16"], "time": 1712255398.7885}, "pipe": null}
+{"event": "data", "data": {"task": "train", "rate": 92.7523917431756, "units": "Tflops", "t": 1712255400.769592}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2410.3125, 24512.0], "load": 0, "temperature": null, "power": null}}, "t": 1712255399.9138083}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.07, "temperature": null, "power": null}}, "t": 1712255400.419759}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 95.38028414920187, "units": "Tflops", "t": 1712255401.4619455}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.27, "temperature": null, "power": null}}, "t": 1712255400.9253342}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.4, "temperature": null, "power": null}}, "t": 1712255401.430599}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 95.34183232162187, "units": "Tflops", "t": 1712255402.1540887}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.6, "temperature": null, "power": null}}, "t": 1712255401.9360483}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 96.02837774345825, "units": "Tflops", "t": 1712255402.8416667}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.73, "temperature": null, "power": null}}, "t": 1712255402.4414482}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 95.08794347175838, "units": "Tflops", "t": 1712255403.5355804}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.93, "temperature": null, "power": null}}, "t": 1712255402.9468606}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255403.4521775}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 95.29681409851152, "units": "Tflops", "t": 1712255404.2281904}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255403.9573402}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 95.4878508372297, "units": "Tflops", "t": 1712255404.9196634}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255404.462785}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 95.2403634007459, "units": "Tflops", "t": 1712255405.6124618}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255404.9682617}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255405.4735107}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 95.00478836738485, "units": "Tflops", "t": 1712255406.3071556}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255405.9788024}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 95.16211233059299, "units": "Tflops", "t": 1712255407.0006332}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255406.4843073}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255406.9896882}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 95.03046701273247, "units": "Tflops", "t": 1712255407.6951482}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255407.4949334}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 94.88428113393039, "units": "Tflops", "t": 1712255408.3906512}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255408.0002701}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 94.85455163008706, "units": "Tflops", "t": 1712255409.0864003}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255408.5055943}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255409.0108666}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 94.89957599090012, "units": "Tflops", "t": 1712255409.7818384}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255409.5161426}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 94.81067450149848, "units": "Tflops", "t": 1712255410.4779058}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255410.0214255}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 94.49556624211912, "units": "Tflops", "t": 1712255411.1761541}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255410.5267718}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255411.0322652}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 94.58646407597354, "units": "Tflops", "t": 1712255411.8738847}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255411.5376697}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 94.67850882338522, "units": "Tflops", "t": 1712255412.570939}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255412.0432758}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255412.5487695}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 94.56254365191127, "units": "Tflops", "t": 1712255413.2687232}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255413.054106}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 94.47691730850947, "units": "Tflops", "t": 1712255413.9672155}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255413.5594823}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 94.41131740448215, "units": "Tflops", "t": 1712255414.6660929}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255414.064878}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255414.5701962}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 94.24031660309983, "units": "Tflops", "t": 1712255415.3664112}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255415.0758157}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.9745359982242, "units": "Tflops", "t": 1712255416.0685322}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255415.5812733}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 94.17116655894259, "units": "Tflops", "t": 1712255416.769187}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255416.0866477}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255416.5919538}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 94.23405813186558, "units": "Tflops", "t": 1712255417.4695296}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255417.0973232}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.86773867414098, "units": "Tflops", "t": 1712255418.1724594}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255417.602632}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255418.107981}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 94.04863966730076, "units": "Tflops", "t": 1712255418.8741972}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255418.6133192}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 94.09251806287995, "units": "Tflops", "t": 1712255419.5754375}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255419.1190927}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.95475220111709, "units": "Tflops", "t": 1712255420.2777157}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255419.6245365}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255420.1299493}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.74437980091288, "units": "Tflops", "t": 1712255420.9817104}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255420.6352532}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.75340048819439, "units": "Tflops", "t": 1712255421.6854718}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255421.1406293}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255421.6459143}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.8057483682863, "units": "Tflops", "t": 1712255422.3888752}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255422.1523018}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.69914374365054, "units": "Tflops", "t": 1712255423.0930595}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255422.6577103}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.73355091676696, "units": "Tflops", "t": 1712255423.7971275}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255423.1632264}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255423.6684918}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.24995841535832, "units": "Tflops", "t": 1712255424.5047314}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255424.1737797}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.86779068743131, "units": "Tflops", "t": 1712255425.2152245}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255424.679123}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255425.184458}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.82121679001737, "units": "Tflops", "t": 1712255425.9262137}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255425.6900592}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.73849600228822, "units": "Tflops", "t": 1712255426.637683}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255426.1955636}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.77780031553147, "units": "Tflops", "t": 1712255427.3488507}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255426.7011583}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255427.207792}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.7771470445031, "units": "Tflops", "t": 1712255428.0600748}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255427.7144244}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.66585212562694, "units": "Tflops", "t": 1712255428.772114}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255428.2211819}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255428.7288618}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.68096782561825, "units": "Tflops", "t": 1712255429.4842148}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255429.2375152}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.5469300287417, "units": "Tflops", "t": 1712255430.1971722}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255429.7460918}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.47554391588781, "units": "Tflops", "t": 1712255430.91068}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255430.254856}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255430.7634194}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.6561397094833, "units": "Tflops", "t": 1712255431.6228282}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255431.2721915}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.44211571139982, "units": "Tflops", "t": 1712255432.3365848}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255431.7809808}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255432.2894936}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.93963611914799, "units": "Tflops", "t": 1712255433.0544062}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255432.798075}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.19462984549327, "units": "Tflops", "t": 1712255433.7700806}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255433.306659}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.41596465083921, "units": "Tflops", "t": 1712255434.484032}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255433.8152964}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255434.32396}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.24543553886119, "units": "Tflops", "t": 1712255435.199348}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255434.8323677}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.14538366074498, "units": "Tflops", "t": 1712255435.9154084}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255435.341082}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255435.849765}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.03810870817333, "units": "Tflops", "t": 1712255436.6323333}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255436.358606}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.15360816518283, "units": "Tflops", "t": 1712255437.3484674}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255436.8673189}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.16257086384087, "units": "Tflops", "t": 1712255438.064383}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255437.3759212}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255437.8843844}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.913035786688, "units": "Tflops", "t": 1712255438.782285}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255438.3931031}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.31655909424165, "units": "Tflops", "t": 1712255439.4970264}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255438.9014273}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255439.4100592}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.28362759715372, "units": "Tflops", "t": 1712255440.2043805}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255439.9188275}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.4166239386077, "units": "Tflops", "t": 1712255440.9106872}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255440.427361}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.95405230453973, "units": "Tflops", "t": 1712255441.620634}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255440.9359872}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255441.4452436}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.0262106571359, "units": "Tflops", "t": 1712255442.3299453}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255441.954898}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.31835966290977, "units": "Tflops", "t": 1712255443.036986}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255442.4645197}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255442.9743228}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.11261029714248, "units": "Tflops", "t": 1712255443.745626}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255443.4839365}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.08632086428433, "units": "Tflops", "t": 1712255444.4621625}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255443.9936693}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.90876448168352, "units": "Tflops", "t": 1712255445.172344}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255444.5033863}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255445.0129802}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.88397008176685, "units": "Tflops", "t": 1712255445.8827431}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255445.5226243}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.74866093405363, "units": "Tflops", "t": 1712255446.594287}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255446.0324113}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255446.5421023}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.55736261315175, "units": "Tflops", "t": 1712255447.3072171}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255447.0520995}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.85333064393474, "units": "Tflops", "t": 1712255448.0178094}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255447.561885}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.44881795164859, "units": "Tflops", "t": 1712255448.731512}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255448.0715935}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255448.5813105}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.8619624988399, "units": "Tflops", "t": 1712255449.4420943}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255449.0910022}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.25380094588698, "units": "Tflops", "t": 1712255450.1573052}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255449.600829}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255450.1105406}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.42522543565042, "units": "Tflops", "t": 1712255450.8712175}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255450.620416}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.23421228434661, "units": "Tflops", "t": 1712255451.5865765}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255451.1301012}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.73277726486148, "units": "Tflops", "t": 1712255452.298232}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255451.6397965}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255452.1494653}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.5499326383956, "units": "Tflops", "t": 1712255453.0111861}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255452.6591728}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.56572276085, "units": "Tflops", "t": 1712255453.7239933}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255453.1688535}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255453.6786718}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.61836443605067, "units": "Tflops", "t": 1712255454.436435}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255454.1893733}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.61737239747556, "units": "Tflops", "t": 1712255455.1488369}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255454.6990778}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.56432929802386, "units": "Tflops", "t": 1712255455.8616416}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255455.2088258}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255455.7185621}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.3846152401066, "units": "Tflops", "t": 1712255456.5758774}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255456.2282014}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.5823546553951, "units": "Tflops", "t": 1712255457.2885575}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255456.7379234}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255457.2477999}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.30461030926529, "units": "Tflops", "t": 1712255458.0034006}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255457.757414}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.19487559429736, "units": "Tflops", "t": 1712255458.7190769}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255458.267144}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.25663076518778, "units": "Tflops", "t": 1712255459.4344037}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255458.7770362}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255459.2868426}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.48599132421646, "units": "Tflops", "t": 1712255460.1478708}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255459.796605}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.84131305164908, "units": "Tflops", "t": 1712255460.8663042}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255460.306355}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255460.816263}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.00135561139192, "units": "Tflops", "t": 1712255461.5835097}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255461.3261979}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.21905753839916, "units": "Tflops", "t": 1712255462.2989829}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255461.8358598}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.31622029742022, "units": "Tflops", "t": 1712255463.013707}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255462.345675}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255462.855472}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.07219504770241, "units": "Tflops", "t": 1712255463.7303734}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255463.3652358}, "pipe": "data"}
+{"event": "end", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "30", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "fp16"], "time": 1712255464.2598617, "return_code": 0}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/fp16.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/fp16.D1.data
new file mode 100644
index 000000000..c6897ad33
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/fp16.D1.data
@@ -0,0 +1,220 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/flops", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "flops", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops", "plan": {"method": "per_gpu"}, "tags": ["diagnostic", "flops"], "argv": {"--number": 30, "--repeat": 90, "--m": 8192, "--n": 8192, "--dtype": "fp16"}, "weight": 0.0, "name": "fp16", "tag": ["fp16", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.75, "total": 24512.0}, "utilization": {"compute": 0.02, "memory": 0.09830899151436032}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712255398.780101, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "30", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "fp16"], "time": 1712255398.7889414}, "pipe": null}
+{"event": "data", "data": {"task": "train", "rate": 92.40534790138327, "units": "Tflops", "t": 1712255400.7453833}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2412.5, 24512.0], "load": 0, "temperature": null, "power": null}}, "t": 1712255399.89603}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.08, "temperature": null, "power": null}}, "t": 1712255400.4021287}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 95.44302386018786, "units": "Tflops", "t": 1712255401.437205}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.21, "temperature": null, "power": null}}, "t": 1712255400.9076393}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.41, "temperature": null, "power": null}}, "t": 1712255401.4135976}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 95.83009610178632, "units": "Tflops", "t": 1712255402.1257973}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.61, "temperature": null, "power": null}}, "t": 1712255401.9190102}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 95.44799524026035, "units": "Tflops", "t": 1712255402.8180318}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.74, "temperature": null, "power": null}}, "t": 1712255402.4249086}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 95.338744374932, "units": "Tflops", "t": 1712255403.5101178}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.94, "temperature": null, "power": null}}, "t": 1712255402.9307432}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255403.4365292}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 94.8885762128826, "units": "Tflops", "t": 1712255404.2055328}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255403.9423325}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 94.85962449378471, "units": "Tflops", "t": 1712255404.9016497}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255404.4480522}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 94.59655307115918, "units": "Tflops", "t": 1712255405.599288}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255404.9536836}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255405.4594023}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 94.69889031492922, "units": "Tflops", "t": 1712255406.2960696}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255405.9650977}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 94.67931873114617, "units": "Tflops", "t": 1712255406.992963}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255406.470816}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255406.976712}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 94.33210663664435, "units": "Tflops", "t": 1712255407.6924787}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255407.4822276}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 94.54079949871489, "units": "Tflops", "t": 1712255408.3905416}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255407.9878407}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 94.71915088784482, "units": "Tflops", "t": 1712255409.0872548}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255408.4933822}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255408.9990942}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 94.26641857440723, "units": "Tflops", "t": 1712255409.7873833}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255409.5046654}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 94.61000841319243, "units": "Tflops", "t": 1712255410.4849021}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255410.0102322}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 94.08595928704538, "units": "Tflops", "t": 1712255411.1861985}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255410.5159712}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255411.0217168}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 94.01502300950892, "units": "Tflops", "t": 1712255411.8882122}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255411.5272825}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 94.10349405985436, "units": "Tflops", "t": 1712255412.5894794}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255412.0329535}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255412.5385776}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 94.17844241886912, "units": "Tflops", "t": 1712255413.290129}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255413.0442328}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 94.02745070312777, "units": "Tflops", "t": 1712255413.991996}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255413.5496633}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.87305685136205, "units": "Tflops", "t": 1712255414.694878}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255414.0553644}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255414.5610058}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.80791092172524, "units": "Tflops", "t": 1712255415.3983996}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255415.066771}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.80638440306514, "units": "Tflops", "t": 1712255416.101779}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255415.572423}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255416.078242}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.53551650881191, "units": "Tflops", "t": 1712255416.8073745}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255416.5838513}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.99863881086047, "units": "Tflops", "t": 1712255417.5093246}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255417.0895095}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.63357350361801, "units": "Tflops", "t": 1712255418.2141116}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255417.5950634}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255418.1007984}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.68328178764803, "units": "Tflops", "t": 1712255418.9184465}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255418.606383}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.81882061579176, "units": "Tflops", "t": 1712255419.6217406}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255419.111954}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255419.617523}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.66111577292718, "units": "Tflops", "t": 1712255420.3263981}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255420.1230996}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.62083790996782, "units": "Tflops", "t": 1712255421.031169}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255420.628865}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.67243530235666, "units": "Tflops", "t": 1712255421.735551}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255421.134295}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255421.6398802}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.67278412682683, "units": "Tflops", "t": 1712255422.440094}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255422.1455631}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.36713514444455, "units": "Tflops", "t": 1712255423.1467912}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255422.651164}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.41425863104064, "units": "Tflops", "t": 1712255423.8531208}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255423.1567166}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255423.6623175}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.41198804846171, "units": "Tflops", "t": 1712255424.5595074}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255424.1679072}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.42819990432454, "units": "Tflops", "t": 1712255425.265889}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255424.6735702}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255425.1793377}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.96804395842739, "units": "Tflops", "t": 1712255425.9756455}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255425.6850648}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.15511763709628, "units": "Tflops", "t": 1712255426.683952}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255426.19075}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.85052641552609, "units": "Tflops", "t": 1712255427.394575}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255426.6975405}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255427.2042375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.33718366624466, "units": "Tflops", "t": 1712255428.1016343}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255427.7108784}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.87891926101426, "units": "Tflops", "t": 1712255428.8120377}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255428.2188647}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255428.7275236}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.83576027392415, "units": "Tflops", "t": 1712255429.5228136}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255429.236183}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.43566147698782, "units": "Tflops", "t": 1712255430.2366183}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255429.7447622}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.00679283055533, "units": "Tflops", "t": 1712255430.946046}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255430.253511}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255430.7620747}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.85990566575383, "units": "Tflops", "t": 1712255431.6567812}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255431.270854}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.01826744224479, "units": "Tflops", "t": 1712255432.3661268}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255431.7795641}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255432.2881606}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.00517950266314, "units": "Tflops", "t": 1712255433.0833066}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255432.7967415}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.85205314109872, "units": "Tflops", "t": 1712255433.793908}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255433.3053286}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.7734764545604, "units": "Tflops", "t": 1712255434.5051248}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255433.81396}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255434.3225067}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.70521911295624, "units": "Tflops", "t": 1712255435.2170057}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255434.8310323}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.63414690758026, "units": "Tflops", "t": 1712255435.929278}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255435.3397274}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255435.848439}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.56847884420803, "units": "Tflops", "t": 1712255436.6421092}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255436.357246}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.74295062597577, "units": "Tflops", "t": 1712255437.3459597}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255436.8658764}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.55958442597641, "units": "Tflops", "t": 1712255438.0511887}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255437.3745108}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255437.8830457}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.78381044891762, "units": "Tflops", "t": 1712255438.754782}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255438.3917675}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.38140901244707, "units": "Tflops", "t": 1712255439.4613802}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255438.900088}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255439.4087217}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.53912116380273, "units": "Tflops", "t": 1712255440.1669505}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255439.917489}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.70872696725468, "units": "Tflops", "t": 1712255440.871061}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255440.4260275}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.67513083169872, "units": "Tflops", "t": 1712255441.5754418}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255440.9346428}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255441.445216}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.72037544409226, "units": "Tflops", "t": 1712255442.2795045}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255441.9549391}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.71570935776161, "units": "Tflops", "t": 1712255442.9835644}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255442.464548}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255442.9743505}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.61877899755696, "units": "Tflops", "t": 1712255443.6884096}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255443.4840288}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.47993250880008, "units": "Tflops", "t": 1712255444.3943682}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255443.993698}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.39146320744268, "units": "Tflops", "t": 1712255445.1008766}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255444.503415}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255445.0130074}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.5479758817099, "units": "Tflops", "t": 1712255445.8062387}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255445.52265}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.1619864010478, "units": "Tflops", "t": 1712255446.5144856}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255446.0324392}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.12238731065332, "units": "Tflops", "t": 1712255447.2230337}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255446.5421906}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255447.0521271}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.41880012736345, "units": "Tflops", "t": 1712255447.929373}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255447.561914}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.90895165955888, "units": "Tflops", "t": 1712255448.6395512}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255448.071647}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255448.5813386}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.93260453216449, "units": "Tflops", "t": 1712255449.349593}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255449.0910294}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.9579870315059, "units": "Tflops", "t": 1712255450.0593996}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255449.600858}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.81533217842181, "units": "Tflops", "t": 1712255450.7704504}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255450.1105695}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255450.620444}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.80758057510381, "units": "Tflops", "t": 1712255451.4814398}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255451.130142}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.74909617965268, "units": "Tflops", "t": 1712255452.1928413}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255451.6398253}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255452.1494927}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.70459792293097, "units": "Tflops", "t": 1712255452.9046192}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255452.6592133}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.70105729874315, "units": "Tflops", "t": 1712255453.6163921}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255453.1689467}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.63749633591054, "units": "Tflops", "t": 1712255454.3286526}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255453.6787007}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255454.1894336}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.63690706707268, "units": "Tflops", "t": 1712255455.0409474}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255454.6991057}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.58213781385389, "units": "Tflops", "t": 1712255455.7536378}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255455.2088523}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255455.71859}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.50918187577987, "units": "Tflops", "t": 1712255456.467034}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255456.2282295}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.61402442417489, "units": "Tflops", "t": 1712255457.1794703}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255456.737953}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.21583049743126, "units": "Tflops", "t": 1712255457.8949811}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255457.247826}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255457.757441}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.00453706671458, "units": "Tflops", "t": 1712255458.612179}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255458.2671707}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.95396575340398, "units": "Tflops", "t": 1712255459.3297238}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255458.7770627}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255459.2868712}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.23934696871284, "units": "Tflops", "t": 1712255460.045095}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255459.796631}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.14427898840967, "units": "Tflops", "t": 1712255460.7611742}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255460.3063846}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.85378250617472, "units": "Tflops", "t": 1712255461.4795084}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255460.8163052}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255461.3262272}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.0702649581587, "units": "Tflops", "t": 1712255462.1961985}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255461.8359008}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.1872887041371, "units": "Tflops", "t": 1712255462.911933}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255462.3457048}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255462.8554997}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.07418649474786, "units": "Tflops", "t": 1712255463.6286023}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255463.3652637}, "pipe": "data"}
+{"event": "end", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "30", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "fp16"], "time": 1712255464.3165908, "return_code": 0}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/fp32.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/fp32.D0.data
new file mode 100644
index 000000000..0e339db77
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/fp32.D0.data
@@ -0,0 +1,345 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/flops", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "flops", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops", "plan": {"method": "per_gpu"}, "tags": ["diagnostic", "flops"], "argv": {"--number": 10, "--repeat": 90, "--m": 8192, "--n": 8192, "--dtype": "fp32"}, "weight": 0.0, "name": "fp32", "tag": ["fp32", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712255545.340192, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "10", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "fp32"], "time": 1712255547.6680193}, "pipe": null}
+{"event": "data", "data": {"task": "train", "rate": 15.910726942115057, "units": "Tflops", "t": 1712255550.303062}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2412.5625, 24512.0], "load": 0, "temperature": null, "power": null}}, "t": 1712255548.7817533}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.11, "temperature": null, "power": null}}, "t": 1712255549.2890816}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.24, "temperature": null, "power": null}}, "t": 1712255549.79447}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.44, "temperature": null, "power": null}}, "t": 1712255550.300143}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.856185806535132, "units": "Tflops", "t": 1712255551.6907296}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.57, "temperature": null, "power": null}}, "t": 1712255550.8055346}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.77, "temperature": null, "power": null}}, "t": 1712255551.3109431}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.791533519580378, "units": "Tflops", "t": 1712255553.0845685}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.9, "temperature": null, "power": null}}, "t": 1712255551.816372}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.98, "temperature": null, "power": null}}, "t": 1712255552.3217378}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.98, "temperature": null, "power": null}}, "t": 1712255552.827104}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.814345117151015, "units": "Tflops", "t": 1712255554.4760323}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.98, "temperature": null, "power": null}}, "t": 1712255553.3326104}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.98, "temperature": null, "power": null}}, "t": 1712255553.838185}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.98, "temperature": null, "power": null}}, "t": 1712255554.3435953}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.760160026503572, "units": "Tflops", "t": 1712255555.8718276}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255554.8491783}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255555.3545823}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255555.8599298}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.73176006759587, "units": "Tflops", "t": 1712255557.2701175}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255556.3653371}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255556.8707101}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.706762229774338, "units": "Tflops", "t": 1712255558.6704862}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255557.3763957}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255557.8817732}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255558.387393}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.7089852637417, "units": "Tflops", "t": 1712255560.0706732}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255558.8928373}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255559.3988273}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255559.9042041}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.70406119425526, "units": "Tflops", "t": 1712255561.4713368}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255560.4097006}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255560.9151978}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255561.4206011}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.708875568608, "units": "Tflops", "t": 1712255562.8715458}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255561.9261262}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255562.431604}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.695410765420805, "units": "Tflops", "t": 1712255564.2729542}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255562.9371133}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255563.4424784}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255563.9479754}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.704676199861359, "units": "Tflops", "t": 1712255565.6734145}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255564.4534698}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255564.958886}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255565.4643083}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.63849032320296, "units": "Tflops", "t": 1712255567.079912}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255565.9697223}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255566.4753246}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255566.9807286}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.66426869659584, "units": "Tflops", "t": 1712255568.4841135}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255567.486238}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255567.9918022}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.65046062362572, "units": "Tflops", "t": 1712255569.8893833}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255568.4972694}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255569.0026352}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255569.5080934}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.728750011050055, "units": "Tflops", "t": 1712255571.2878015}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255570.0136085}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255570.5189857}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255571.024495}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.889911177988383, "units": "Tflops", "t": 1712255572.6719346}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255571.5303214}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255572.0357168}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255572.5411687}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.826081141049558, "units": "Tflops", "t": 1712255574.0617833}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255573.0466516}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255573.5519965}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255574.0574956}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.867670099370265, "units": "Tflops", "t": 1712255575.4478362}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255574.563711}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255575.069092}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.809582412552908, "units": "Tflops", "t": 1712255576.8389657}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255575.574485}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255576.0798798}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255576.58535}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.826445032484301, "units": "Tflops", "t": 1712255578.228757}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255577.090967}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255577.5963519}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255578.1018555}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.811398244916061, "units": "Tflops", "t": 1712255579.6197503}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255578.6072128}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255579.1125522}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255579.6179092}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.791063089377742, "units": "Tflops", "t": 1712255581.0125449}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255580.123556}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255580.628886}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.829614854308845, "units": "Tflops", "t": 1712255582.4020793}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255581.1342416}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255581.6397645}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255582.1452425}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.774675025991804, "units": "Tflops", "t": 1712255583.796309}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255582.6507223}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255583.1561096}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255583.6615043}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.787978951217191, "units": "Tflops", "t": 1712255585.1893754}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255584.1670303}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255584.6724446}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255585.1779106}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.799299666614093, "units": "Tflops", "t": 1712255586.5815606}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255585.683302}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255586.1888883}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.722232171770562, "units": "Tflops", "t": 1712255587.9804113}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255586.6942391}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255587.1997223}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255587.7052448}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.728216262102675, "units": "Tflops", "t": 1712255589.378773}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255588.2106667}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255588.7160475}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255589.2216003}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.764930720094418, "units": "Tflops", "t": 1712255590.7740073}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255589.7270205}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255590.232445}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255590.7379248}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.768725631060855, "units": "Tflops", "t": 1712255592.1687796}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255591.2433767}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255591.7487893}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.697796230731747, "units": "Tflops", "t": 1712255593.5697937}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255592.254256}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255592.7596364}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255593.2651298}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.70776532604224, "units": "Tflops", "t": 1712255594.969962}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255593.7705636}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255594.2761729}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255594.7815537}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.731671520076944, "units": "Tflops", "t": 1712255596.3681211}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255595.2872436}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255595.7926126}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255596.2981296}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.698122184867804, "units": "Tflops", "t": 1712255597.7691581}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255596.8037198}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255597.309168}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.724963590675703, "units": "Tflops", "t": 1712255599.1677573}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255597.8147457}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255598.3201616}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255598.8257203}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.66051325436683, "units": "Tflops", "t": 1712255600.572145}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255599.3311179}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255599.8365905}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255600.3431256}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.708995965787986, "units": "Tflops", "t": 1712255601.97222}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255600.8485634}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255601.3542163}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255601.8597043}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.734113650298097, "units": "Tflops", "t": 1712255603.3702004}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255602.3651214}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255602.870679}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.705756587202982, "units": "Tflops", "t": 1712255604.7705188}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255603.376286}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255603.8819466}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255604.387317}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.698156918483635, "units": "Tflops", "t": 1712255606.1715412}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255604.892803}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255605.3985481}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255605.9039252}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.699843019570608, "units": "Tflops", "t": 1712255607.5724313}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255606.4093637}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255606.9148562}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255607.4204767}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.672317575761674, "units": "Tflops", "t": 1712255608.9757614}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255607.9259338}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255608.431323}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255608.9368682}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.681217160293855, "units": "Tflops", "t": 1712255610.3784306}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255609.4424253}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255609.9479496}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.642370504279942, "units": "Tflops", "t": 1712255611.784417}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255610.4536211}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255610.9591603}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255611.4646447}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.655818825629723, "units": "Tflops", "t": 1712255613.189246}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255611.9701402}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255612.4756014}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255612.9811418}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.683200959377295, "units": "Tflops", "t": 1712255614.5916069}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255613.486701}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255613.9921622}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255614.4976285}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.657945058422127, "units": "Tflops", "t": 1712255615.9962435}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255615.0029662}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255615.508457}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.65166902010008, "units": "Tflops", "t": 1712255617.4013913}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255616.0139093}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255616.5194578}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255617.024808}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.60774781288782, "units": "Tflops", "t": 1712255618.8105342}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255617.530576}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255618.0361245}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255618.5414977}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.64558643961625, "units": "Tflops", "t": 1712255620.2164268}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255619.0468614}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255619.5523326}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255620.057975}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.61102350877372, "units": "Tflops", "t": 1712255621.6252875}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255620.5639396}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255621.0693052}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255621.5747862}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.644455935408924, "units": "Tflops", "t": 1712255623.0311265}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255622.0802267}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255622.585605}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.596623359621637, "units": "Tflops", "t": 1712255624.4412355}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255623.0911338}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255623.5965674}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255624.1019561}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.622343160160543, "units": "Tflops", "t": 1712255625.8490694}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255624.6074588}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255625.1131208}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255625.6184845}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.628513603874548, "units": "Tflops", "t": 1712255627.2563362}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255626.1238346}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255626.6293097}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255627.134831}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.590818027442614, "units": "Tflops", "t": 1712255628.6671333}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255627.6403165}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255628.1459706}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255628.6515367}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.642882524290647, "units": "Tflops", "t": 1712255630.073115}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255629.1569285}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255629.6623812}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.587102985739763, "units": "Tflops", "t": 1712255631.4840946}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255630.1678379}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255630.6733983}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255631.1787448}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.616666740355537, "units": "Tflops", "t": 1712255632.8924332}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255631.6843183}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255632.1897936}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255632.695194}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.569839445954809, "units": "Tflops", "t": 1712255634.305002}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255633.2006707}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255633.7061725}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255634.2118294}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.581378439159183, "units": "Tflops", "t": 1712255635.7165217}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255634.7171934}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255635.2227345}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.595354886713398, "units": "Tflops", "t": 1712255637.1267536}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255635.7282622}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255636.233739}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255636.7392259}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.557212722859132, "units": "Tflops", "t": 1712255638.5404723}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255637.2448416}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255637.7503226}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255638.2557552}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.593364248620146, "units": "Tflops", "t": 1712255639.9510727}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255638.761263}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255639.2666855}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255639.772051}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.590235623738554, "units": "Tflops", "t": 1712255641.3617966}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255640.2774434}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255640.7829378}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255641.2884593}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.563759884065002, "units": "Tflops", "t": 1712255642.7749298}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255641.7938502}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255642.2992673}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.586407600234109, "units": "Tflops", "t": 1712255644.1859736}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255642.8048894}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255643.310384}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255643.815686}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.561344091010326, "units": "Tflops", "t": 1712255645.599321}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255644.3212526}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255644.8265646}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255645.3320694}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.549897659154153, "units": "Tflops", "t": 1712255647.0137086}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255645.8372893}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255646.3428822}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255646.8483977}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.577367969870629, "units": "Tflops", "t": 1712255648.4255903}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255647.3539088}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255647.859561}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255648.3650525}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.545722582078248, "units": "Tflops", "t": 1712255649.840362}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255648.8704834}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255649.3758738}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.525782375547141, "units": "Tflops", "t": 1712255651.2570674}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255649.8814263}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255650.3868208}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255650.8942258}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.552527562096355, "units": "Tflops", "t": 1712255652.6712215}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255651.4004166}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255651.9060063}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255652.4114156}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.569077269937086, "units": "Tflops", "t": 1712255654.0838726}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255652.917131}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255653.4225712}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255653.9280612}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.530807087566235, "units": "Tflops", "t": 1712255655.4999828}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255654.4334183}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255654.9390645}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255655.4446144}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.546304285106634, "units": "Tflops", "t": 1712255656.914702}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255655.9500153}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255656.4555209}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.543417160458414, "units": "Tflops", "t": 1712255658.3296328}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255656.9609184}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255657.4663062}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255657.971797}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.550796917709162, "units": "Tflops", "t": 1712255659.7439384}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255658.477359}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255658.9828982}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255659.48829}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.545793327418611, "units": "Tflops", "t": 1712255661.158698}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255659.9937115}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255660.4992616}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255661.0046268}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.491811215556384, "units": "Tflops", "t": 1712255662.5783842}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255661.510044}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255662.015635}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255662.521077}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.539125134956947, "units": "Tflops", "t": 1712255663.993747}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255663.02651}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255663.531993}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.527653844517225, "units": "Tflops", "t": 1712255665.4101334}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255664.0374837}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255664.5430436}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255665.0484688}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.525889528493186, "units": "Tflops", "t": 1712255666.8268692}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255665.5540493}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255666.059478}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255666.5648825}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.526762488085254, "units": "Tflops", "t": 1712255668.2433631}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255667.070284}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255667.5757911}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255668.081366}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.535416367659906, "units": "Tflops", "t": 1712255669.6590788}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255668.586842}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255669.0922627}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255669.5983434}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.512345106668553, "units": "Tflops", "t": 1712255671.0768833}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255670.1037605}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255670.609235}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.522440468893725, "units": "Tflops", "t": 1712255672.4937396}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255671.1146243}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255671.6200562}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255672.1254392}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.506980359493857, "units": "Tflops", "t": 1712255673.9120388}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255672.6307805}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255673.1362188}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255673.6417248}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.500850784497917, "units": "Tflops", "t": 1712255675.3309033}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255674.1472812}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255674.652726}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255675.1582773}, "pipe": "data"}
+{"event": "end", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "10", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "fp32"], "time": 1712255676.042694, "return_code": 0}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/fp32.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/fp32.D1.data
new file mode 100644
index 000000000..9c68189e0
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/fp32.D1.data
@@ -0,0 +1,342 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/flops", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "flops", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops", "plan": {"method": "per_gpu"}, "tags": ["diagnostic", "flops"], "argv": {"--number": 10, "--repeat": 90, "--m": 8192, "--n": 8192, "--dtype": "fp32"}, "weight": 0.0, "name": "fp32", "tag": ["fp32", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.75, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09830899151436032}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712255547.659288, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "10", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "fp32"], "time": 1712255547.66849}, "pipe": null}
+{"event": "data", "data": {"task": "train", "rate": 15.927861143345272, "units": "Tflops", "t": 1712255550.325242}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2410.4375, 24512.0], "load": 0, "temperature": null, "power": null}}, "t": 1712255548.8028693}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.1, "temperature": null, "power": null}}, "t": 1712255549.3089736}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.23, "temperature": null, "power": null}}, "t": 1712255549.8144863}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.43, "temperature": null, "power": null}}, "t": 1712255550.3201358}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.96590161097079, "units": "Tflops", "t": 1712255551.7033868}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.56, "temperature": null, "power": null}}, "t": 1712255550.825632}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.76, "temperature": null, "power": null}}, "t": 1712255551.33129}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.920836496091372, "units": "Tflops", "t": 1712255553.0853846}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.89, "temperature": null, "power": null}}, "t": 1712255551.8366325}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255552.3419173}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255552.8474307}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.953537928996345, "units": "Tflops", "t": 1712255554.4643826}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255553.35291}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255553.8586426}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255554.3643098}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 16.086912982354253, "units": "Tflops", "t": 1712255555.8316953}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255554.8700383}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255555.375619}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 16.098542980668697, "units": "Tflops", "t": 1712255557.1980338}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255555.8811498}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255556.3867881}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255556.8923848}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 16.065952992138705, "units": "Tflops", "t": 1712255558.5672188}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255557.3979971}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255557.9035702}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255558.4092448}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 16.05193641729031, "units": "Tflops", "t": 1712255559.93773}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255558.9150112}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255559.420736}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255559.9263666}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 16.06166965119002, "units": "Tflops", "t": 1712255561.3074536}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255560.4320366}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255560.9377532}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 16.000539928098103, "units": "Tflops", "t": 1712255562.6822336}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255561.4433444}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255561.949639}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255562.4553719}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.988740616948034, "units": "Tflops", "t": 1712255564.0582309}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255562.9609451}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255563.4666877}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255563.9725835}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 16.020016026065175, "units": "Tflops", "t": 1712255565.4313016}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255564.4781733}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255564.9836419}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 16.00336335878148, "units": "Tflops", "t": 1712255566.8057618}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255565.4890256}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255565.9944954}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255566.4998224}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.99346213511091, "units": "Tflops", "t": 1712255568.1809568}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255567.005266}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255567.5105267}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255568.0159228}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.960699184458605, "units": "Tflops", "t": 1712255569.558959}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255568.521181}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255569.0265794}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255569.5319989}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.960837282608383, "units": "Tflops", "t": 1712255570.936968}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255570.0372443}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255570.5424907}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.949021969708241, "units": "Tflops", "t": 1712255572.3159585}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255571.0477796}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255571.5530505}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255572.0583904}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.938777443046229, "units": "Tflops", "t": 1712255573.6958804}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255572.563781}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255573.069115}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255573.574452}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.955246218379944, "units": "Tflops", "t": 1712255575.0743537}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255574.0798788}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255574.5851853}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.896782521129694, "units": "Tflops", "t": 1712255576.4578922}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255575.0904922}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255575.5957592}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255576.1011443}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.899621525413934, "units": "Tflops", "t": 1712255577.84118}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255576.6064155}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255577.111898}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255577.6171765}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.889804416430762, "units": "Tflops", "t": 1712255579.2253482}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255578.1224818}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255578.627782}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255579.133088}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.924786582030451, "units": "Tflops", "t": 1712255580.606949}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255579.6383667}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255580.1438332}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.881806855633734, "units": "Tflops", "t": 1712255581.9921534}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255580.6491568}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255581.1544294}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255581.659799}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.911273150425377, "units": "Tflops", "t": 1712255583.3747807}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255582.165105}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255582.6704822}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255583.1757798}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.903654325186311, "units": "Tflops", "t": 1712255584.7578363}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255583.6810932}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255584.1863942}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255584.6917443}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.87575453155035, "units": "Tflops", "t": 1712255586.1432197}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255585.1970885}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255585.7025359}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.893670646825067, "units": "Tflops", "t": 1712255587.5270286}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255586.2079563}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255586.7132492}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255587.2186575}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.898010074231, "units": "Tflops", "t": 1712255588.910492}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255587.7239938}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255588.2294085}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255588.7349155}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.889147453807437, "units": "Tflops", "t": 1712255590.2950273}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255589.24036}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255589.7459192}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255590.2512972}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.85125076302189, "units": "Tflops", "t": 1712255591.6826117}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255590.756617}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255591.261997}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.845698086008236, "units": "Tflops", "t": 1712255593.070604}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255591.767399}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255592.2727113}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255592.7780836}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.83433255195741, "units": "Tflops", "t": 1712255594.4596043}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255593.2834687}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255593.7888258}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255594.2941942}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.837062273657065, "units": "Tflops", "t": 1712255595.848394}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255594.7994752}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255595.304846}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255595.8101795}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.810579713622479, "units": "Tflops", "t": 1712255597.239484}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255596.315452}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255596.820816}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.830242450310054, "units": "Tflops", "t": 1712255598.6288505}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255597.3261344}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255597.831435}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255598.3369863}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.77574617801189, "units": "Tflops", "t": 1712255600.02301}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255598.8425097}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255599.3479187}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255599.853252}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.824544291675224, "units": "Tflops", "t": 1712255601.4128883}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255600.3586054}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255600.864059}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255601.369384}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.780275223819668, "units": "Tflops", "t": 1712255602.8066514}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255601.8748083}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255602.3801239}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.82332262793939, "units": "Tflops", "t": 1712255604.1966085}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255602.8854797}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255603.3907616}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255603.896141}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.801102309603413, "units": "Tflops", "t": 1712255605.5885296}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255604.4013927}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255604.9066634}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255605.4120035}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.780839512366876, "units": "Tflops", "t": 1712255606.982265}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255605.9172597}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255606.422697}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255606.927993}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.81647123407189, "units": "Tflops", "t": 1712255608.3728466}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255607.433484}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255607.9389668}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.787997868637406, "units": "Tflops", "t": 1712255609.7658947}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255608.4449701}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255608.950273}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255609.4555984}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.824720769806998, "units": "Tflops", "t": 1712255611.1557574}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255609.961074}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255610.466375}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255610.9719179}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.84609827149076, "units": "Tflops", "t": 1712255612.5437315}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255611.4772956}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255611.9825716}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255612.487816}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.827596560411902, "units": "Tflops", "t": 1712255613.933693}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255612.9932127}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255613.4986148}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.81486032259912, "units": "Tflops", "t": 1712255615.3245184}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255614.003885}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255614.5092156}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255615.0145226}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.825766144066446, "units": "Tflops", "t": 1712255616.714376}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255615.5199094}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255616.0251594}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255616.5305326}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.841770807445616, "units": "Tflops", "t": 1712255618.102733}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255617.0358903}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255617.5412104}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255618.0464497}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.797297212290763, "units": "Tflops", "t": 1712255619.4950016}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255618.551813}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255619.057385}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.811921390430832, "units": "Tflops", "t": 1712255620.8859334}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255619.5635452}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255620.0690506}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255620.574435}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.805101530966033, "units": "Tflops", "t": 1712255622.277515}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255621.0797808}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255621.585039}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255622.0903418}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.780234726193733, "units": "Tflops", "t": 1712255623.6712992}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255622.595719}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255623.1009667}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255623.6062264}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.812458124914539, "units": "Tflops", "t": 1712255625.0622237}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255624.1115322}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255624.6170847}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.762621785574742, "units": "Tflops", "t": 1712255626.4575264}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255625.12236}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255625.6277254}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255626.1331651}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.800769358082206, "units": "Tflops", "t": 1712255627.8494701}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255626.638466}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255627.1438243}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255627.6491601}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.772460334203938, "units": "Tflops", "t": 1712255629.2439322}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255628.1547961}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255628.6601436}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255629.1656568}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.7827189652354, "units": "Tflops", "t": 1712255630.6374855}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255629.6711864}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255630.1766105}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.762551746790068, "units": "Tflops", "t": 1712255632.0327938}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255630.6818712}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255631.1871288}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255631.6924818}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.744620768225206, "units": "Tflops", "t": 1712255633.4297032}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255632.1978962}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255632.7032506}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255633.20859}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.7515015014894, "units": "Tflops", "t": 1712255634.826023}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255633.71408}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255634.2194915}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255634.7248755}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.76974744263548, "units": "Tflops", "t": 1712255636.2210338}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255635.2301898}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255635.7355587}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.76835899744905, "units": "Tflops", "t": 1712255637.6158917}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255636.2409394}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255636.74635}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255637.2517416}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.7330669302239, "units": "Tflops", "t": 1712255639.013847}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255637.75715}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255638.2628024}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255638.7683449}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.790625127832094, "units": "Tflops", "t": 1712255640.406692}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255639.2736256}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255639.7790394}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255640.2844267}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.74997372296358, "units": "Tflops", "t": 1712255641.8031268}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255640.7900374}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255641.2954571}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255641.8009105}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.77880394348938, "units": "Tflops", "t": 1712255643.197019}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255642.306196}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255642.8116083}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.736752539702902, "units": "Tflops", "t": 1712255644.594863}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255643.316945}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255643.8224695}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255644.327852}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.716386533654639, "units": "Tflops", "t": 1712255645.9947503}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255644.8331714}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255645.3386047}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255645.8440557}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.71156755506454, "units": "Tflops", "t": 1712255647.3949244}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255646.3494732}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255646.8565965}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255647.3619893}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.721468402034684, "units": "Tflops", "t": 1712255648.7942026}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255647.8682337}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255648.3735483}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.711377533712646, "units": "Tflops", "t": 1712255650.194333}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255648.8789198}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255649.3842778}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255649.8896177}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.734022392237087, "units": "Tflops", "t": 1712255651.592633}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255650.3962462}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255650.9017773}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255651.4073474}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.703523772273826, "units": "Tflops", "t": 1712255652.993522}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255651.9128258}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255652.4182656}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255652.9272428}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.715805422313117, "units": "Tflops", "t": 1712255654.3933566}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255653.4329352}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255653.9384727}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.716723972864612, "units": "Tflops", "t": 1712255655.7931292}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255654.443861}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255654.949182}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255655.4546654}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.711966347212744, "units": "Tflops", "t": 1712255657.1933308}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255655.9600194}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255656.4652593}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255656.9706042}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.713899036188513, "units": "Tflops", "t": 1712255658.5932384}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255657.4759762}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255657.9811897}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255658.4867256}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.726347085312163, "units": "Tflops", "t": 1712255659.9921033}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255658.9919832}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255659.497483}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.72239833501485, "units": "Tflops", "t": 1712255661.3913586}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255660.002859}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255660.5081415}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255661.0134037}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.722961172099689, "units": "Tflops", "t": 1712255662.790462}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255661.5186653}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255662.0240808}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255662.5293708}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.698677941166077, "units": "Tflops", "t": 1712255664.1917977}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255663.0347106}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255663.5401568}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255664.0454066}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.699166932373712, "units": "Tflops", "t": 1712255665.5930223}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255664.5507834}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255665.0564866}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255665.561961}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.726698360101759, "units": "Tflops", "t": 1712255666.991662}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255666.0672402}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255666.5725327}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.735077292663831, "units": "Tflops", "t": 1712255668.3894024}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255667.0779963}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255667.58336}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255668.0886757}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.704240343090143, "units": "Tflops", "t": 1712255669.7899332}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255668.5942047}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255669.0996523}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255669.6050286}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.698303870097522, "units": "Tflops", "t": 1712255671.1909719}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255670.1102896}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255670.615615}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255671.121193}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.719726748644288, "units": "Tflops", "t": 1712255672.5901182}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255671.6265645}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255672.1320064}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.703232349864692, "units": "Tflops", "t": 1712255673.9906921}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255672.6373641}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255673.1428497}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255673.6485028}, "pipe": "data"}
+{"event": "end", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "10", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "fp32"], "time": 1712255674.5593328, "return_code": 0}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/llama.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/llama.D0.data
new file mode 100644
index 000000000..772729c25
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/llama.D0.data
@@ -0,0 +1,33 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/llm", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "llm", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 800, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/llama", "plan": {"method": "per_gpu"}, "tags": ["llm", "nlp"], "weight": 1.0, "name": "llama", "tag": ["llama", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712255328.306572, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["python", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/llama/main.py", "--cache", "/Users/satyaortiz-gagne/travail/mila/milabench/cache"], "time": 1712255330.5847871}, "pipe": null}
+{"event": "line", "data": "Dataset\n", "pipe": "stderr"}
+{"event": "line", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/datasets/table.py:1421: FutureWarning: promote has been superseded by mode='default'.\n", "pipe": "stderr"}
+{"event": "line", "data": " table = cls._concat_blocks(blocks, axis=0)\n", "pipe": "stderr"}
+{"event": "line", "data": "Tokenizer\n", "pipe": "stderr"}
+{"event": "line", "data": "Model\n", "pipe": "stderr"}
+{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/llama/main.py\", line 231, in \n", "pipe": "stderr"}
+{"event": "line", "data": " main()", "pipe": "stderr"}
+{"event": "line", "data": "\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/llama/main.py\", line 227, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " return huggingface_main(args, model, config)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/llama/main.py\", line 143, in huggingface_main\n", "pipe": "stderr"}
+{"event": "line", "data": " model = LlamaForCausalLM(LlamaConfig.from_dict(config)).cuda()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/modeling_utils.py\", line 2243, in cuda\n", "pipe": "stderr"}
+{"event": "line", "data": " return super().cuda(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 918, in cuda\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._apply(lambda t: t.cuda(device))\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 810, in _apply\n", "pipe": "stderr"}
+{"event": "line", "data": " module._apply(fn)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 810, in _apply\n", "pipe": "stderr"}
+{"event": "line", "data": " module._apply(fn)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 810, in _apply\n", "pipe": "stderr"}
+{"event": "line", "data": " module._apply(fn)\n", "pipe": "stderr"}
+{"event": "line", "data": " [Previous line repeated 2 more times]\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 833, in _apply\n", "pipe": "stderr"}
+{"event": "line", "data": " param_applied = fn(param)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 918, in \n", "pipe": "stderr"}
+{"event": "line", "data": " return self._apply(lambda t: t.cuda(device))\n", "pipe": "stderr"}
+{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 172.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 150.19 MiB is free. Including non-PyTorch memory, this process has 21.43 GiB memory in use. Of the allocated memory 21.20 GiB is allocated by PyTorch, and 9.14 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"}
+{"event": "end", "data": {"command": ["python", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/llama/main.py", "--cache", "/Users/satyaortiz-gagne/travail/mila/milabench/cache"], "time": 1712255394.1711965, "return_code": 1}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/llama.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/llama.D1.data
new file mode 100644
index 000000000..68b4c69a3
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/llama.D1.data
@@ -0,0 +1,34 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/llm", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "llm", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 800, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/llama", "plan": {"method": "per_gpu"}, "tags": ["llm", "nlp"], "weight": 1.0, "name": "llama", "tag": ["llama", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.75, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09830899151436032}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712255330.576584, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["python", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/llama/main.py", "--cache", "/Users/satyaortiz-gagne/travail/mila/milabench/cache"], "time": 1712255330.585346}, "pipe": null}
+{"event": "line", "data": "Dataset\n", "pipe": "stderr"}
+{"event": "line", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/datasets/table.py:1421: FutureWarning: promote has been superseded by mode='default'.\n", "pipe": "stderr"}
+{"event": "line", "data": " table = cls._concat_blocks(blocks, axis=0)\n", "pipe": "stderr"}
+{"event": "line", "data": "Tokenizer\n", "pipe": "stderr"}
+{"event": "line", "data": "Model\n", "pipe": "stderr"}
+{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/llama/main.py\", line 231, in \n", "pipe": "stderr"}
+{"event": "line", "data": " main()", "pipe": "stderr"}
+{"event": "line", "data": "\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/llama/main.py\", line 227, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " return huggingface_main(args, model, config)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/llama/main.py\", line 143, in huggingface_main\n", "pipe": "stderr"}
+{"event": "line", "data": " model = LlamaForCausalLM(LlamaConfig.from_dict(config)).cuda()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/modeling_utils.py\", line 2243, in cuda\n", "pipe": "stderr"}
+{"event": "line", "data": " return super().cuda(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 918, in cuda\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._apply(lambda t: t.cuda(device))\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 810, in _apply\n", "pipe": "stderr"}
+{"event": "line", "data": " module._apply(fn)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 810, in _apply\n", "pipe": "stderr"}
+{"event": "line", "data": " module._apply(fn)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 810, in _apply\n", "pipe": "stderr"}
+{"event": "line", "data": " module._apply(fn)\n", "pipe": "stderr"}
+{"event": "line", "data": " [Previous line repeated 2 more times]\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 833, in _apply\n", "pipe": "stderr"}
+{"event": "line", "data": " param_applied = fn(param)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 918, in \n", "pipe": "stderr"}
+{"event": "line", "data": " return self._apply(lambda t: t.cuda(device))\n", "pipe": "stderr"}
+{"event": "line", "data": "torch.cuda.", "pipe": "stderr"}
+{"event": "line", "data": "OutOfMemoryError: CUDA out of memory. Tried to allocate 172.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 150.19 MiB is free. Including non-PyTorch memory, this process has 21.43 GiB memory in use. Of the allocated memory 21.20 GiB is allocated by PyTorch, and 9.14 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"}
+{"event": "end", "data": {"command": ["python", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/llama/main.py", "--cache", "/Users/satyaortiz-gagne/travail/mila/milabench/cache"], "time": 1712255393.429458, "return_code": 1}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/opt-1_3b-multinode.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/opt-1_3b-multinode.data
new file mode 100644
index 000000000..3b45f0015
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/opt-1_3b-multinode.data
@@ -0,0 +1 @@
+{"event": "message", "data": {"message": "Skip opt-1_3b-multinode because the following capability is not satisfied: len(nodes) >= 2"}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/opt-1_3b.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/opt-1_3b.data
new file mode 100644
index 000000000..3e2c6cea0
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/opt-1_3b.data
@@ -0,0 +1 @@
+{"event": "error", "data": {"type": "KeyError", "message": "'port'", "trace": "Traceback (most recent call last):\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/multi.py\", line 202, in do_run\n await exec_plan.execute(\"run\", timeout=True, timeout_delay=600)\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 108, in execute\n return await execute_command(self, phase, timeout, timeout_delay, **kwargs)\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/executors.py\", line 57, in execute_command\n for pack, argv, _kwargs in command.commands():\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 150, in commands\n yield from executor.commands()\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 150, in commands\n yield from executor.commands()\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 125, in commands\n yield self.pack, self.argv(), self.kwargs()\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 121, in argv\n return self._argv(**kwargs) + self.exec.argv(**kwargs)\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 304, in argv\n script_args = self.exec.argv(**kwargs)\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 122, in argv\n return self._argv(**kwargs)\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 682, in _argv\n f\"--main_process_port={manager['port']}\",\nKeyError: 'port'\n"}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/opt-6_7b-multinode.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/opt-6_7b-multinode.data
new file mode 100644
index 000000000..cccd5c098
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/opt-6_7b-multinode.data
@@ -0,0 +1 @@
+{"event": "message", "data": {"message": "Skip opt-6_7b-multinode because the following capability is not satisfied: len(nodes) >= 2"}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/opt-6_7b.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/opt-6_7b.data
new file mode 100644
index 000000000..3e2c6cea0
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/opt-6_7b.data
@@ -0,0 +1 @@
+{"event": "error", "data": {"type": "KeyError", "message": "'port'", "trace": "Traceback (most recent call last):\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/multi.py\", line 202, in do_run\n await exec_plan.execute(\"run\", timeout=True, timeout_delay=600)\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 108, in execute\n return await execute_command(self, phase, timeout, timeout_delay, **kwargs)\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/executors.py\", line 57, in execute_command\n for pack, argv, _kwargs in command.commands():\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 150, in commands\n yield from executor.commands()\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 150, in commands\n yield from executor.commands()\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 125, in commands\n yield self.pack, self.argv(), self.kwargs()\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 121, in argv\n return self._argv(**kwargs) + self.exec.argv(**kwargs)\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 304, in argv\n script_args = self.exec.argv(**kwargs)\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 122, in argv\n return self._argv(**kwargs)\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 682, in _argv\n f\"--main_process_port={manager['port']}\",\nKeyError: 'port'\n"}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/reformer.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/reformer.D0.data
new file mode 100644
index 000000000..cb304d036
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/reformer.D0.data
@@ -0,0 +1,47 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "hf", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "argv": {"--precision": "tf32-fp16", "--num-workers": 8, "--model": "Reformer", "--batch-size": 64}, "plan": {"method": "per_gpu"}, "tags": ["huggingface", "language-modeling", "nlp", "transformer"], "weight": 1.0, "name": "reformer", "tag": ["reformer", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712255852.033217, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-reformer.D0-0efae956f1553a76c1e03985181900f5.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "Reformer", "--batch-size", "64"], "time": 1712255854.4108155}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"}
+{"event": "line", "data": "We strongly recommend passing in an `attention_mask` since your input_ids may be padded. See https://huggingface.co/docs/transformers/troubleshooting#incorrect-output-when-padding-tokens-arent-masked.\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [23555.8125, 24512.0], "load": 0.43, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 1.50 GiB. GPU 0 has a total capacty of 23.73 GiB of which 956.19 MiB is free. Including non-PyTorch memory, this process has 20.65 GiB memory in use. Of the allocated memory 18.80 GiB is allocated by PyTorch, and 1.56 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"}
+{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"}
+{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"}
+{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"}
+{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 156, in \n", "pipe": "stderr"}
+{"event": "line", "data": " main()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 152, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " runner.train()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 70, in train\n", "pipe": "stderr"}
+{"event": "line", "data": " self.step(data)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 59, in step\n", "pipe": "stderr"}
+{"event": "line", "data": " self.amp_scaler.scale(loss).backward()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/_tensor.py\", line 492, in backward\n", "pipe": "stderr"}
+{"event": "line", "data": " torch.autograd.backward(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/autograd/__init__.py\", line 251, in backward\n", "pipe": "stderr"}
+{"event": "line", "data": " Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/autograd/function.py\", line 288, in apply\n", "pipe": "stderr"}
+{"event": "line", "data": " return user_fn(self, *args)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/reformer/modeling_reformer.py\", line 1677, in backward\n", "pipe": "stderr"}
+{"event": "line", "data": " output = layer.backward_pass(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/reformer/modeling_reformer.py\", line 1564, in backward_pass\n", "pipe": "stderr"}
+{"event": "line", "data": " output.backward(grad_attn_output, retain_graph=True)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/_tensor.py\", line 492, in backward\n", "pipe": "stderr"}
+{"event": "line", "data": " torch.autograd.backward(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/autograd/__init__.py\", line 251, in backward\n", "pipe": "stderr"}
+{"event": "line", "data": " Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass\n", "pipe": "stderr"}
+{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 1.50 GiB. GPU 0 has a total capacty of 23.73 GiB of which 956.19 MiB is free. Including non-PyTorch memory, this process has 20.65 GiB memory in use. Of the allocated memory 18.80 GiB is allocated by PyTorch, and 1.56 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-reformer.D0-0efae956f1553a76c1e03985181900f5.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "Reformer", "--batch-size", "64"], "time": 1712255858.6136966, "return_code": 1}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/reformer.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/reformer.D1.data
new file mode 100644
index 000000000..c491ddda2
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/reformer.D1.data
@@ -0,0 +1,47 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "hf", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "argv": {"--precision": "tf32-fp16", "--num-workers": 8, "--model": "Reformer", "--batch-size": 64}, "plan": {"method": "per_gpu"}, "tags": ["huggingface", "language-modeling", "nlp", "transformer"], "weight": 1.0, "name": "reformer", "tag": ["reformer", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.75, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09830899151436032}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712255854.393929, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-reformer.D1-0efae956f1553a76c1e03985181900f5.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "Reformer", "--batch-size", "64"], "time": 1712255854.418388}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"}
+{"event": "line", "data": "We strongly recommend passing in an `attention_mask` since your input_ids may be padded. See https://huggingface.co/docs/transformers/troubleshooting#incorrect-output-when-padding-tokens-arent-masked.\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [23555.8125, 24512.0], "load": 0.44, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 1.50 GiB. GPU 0 has a total capacty of 23.73 GiB of which 956.19 MiB is free. Including non-PyTorch memory, this process has 20.65 GiB memory in use. Of the allocated memory 18.80 GiB is allocated by PyTorch, and 1.56 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"}
+{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"}
+{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"}
+{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"}
+{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 156, in \n", "pipe": "stderr"}
+{"event": "line", "data": " main()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 152, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " runner.train()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 70, in train\n", "pipe": "stderr"}
+{"event": "line", "data": " self.step(data)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 59, in step\n", "pipe": "stderr"}
+{"event": "line", "data": " self.amp_scaler.scale(loss).backward()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/_tensor.py\", line 492, in backward\n", "pipe": "stderr"}
+{"event": "line", "data": " torch.autograd.backward(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/autograd/__init__.py\", line 251, in backward\n", "pipe": "stderr"}
+{"event": "line", "data": " Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/autograd/function.py\", line 288, in apply\n", "pipe": "stderr"}
+{"event": "line", "data": " return user_fn(self, *args)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/reformer/modeling_reformer.py\", line 1677, in backward\n", "pipe": "stderr"}
+{"event": "line", "data": " output = layer.backward_pass(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/reformer/modeling_reformer.py\", line 1564, in backward_pass\n", "pipe": "stderr"}
+{"event": "line", "data": " output.backward(grad_attn_output, retain_graph=True)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/_tensor.py\", line 492, in backward\n", "pipe": "stderr"}
+{"event": "line", "data": " torch.autograd.backward(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/autograd/__init__.py\", line 251, in backward\n", "pipe": "stderr"}
+{"event": "line", "data": " Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass\n", "pipe": "stderr"}
+{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 1.50 GiB. GPU 0 has a total capacty of 23.73 GiB of which 956.19 MiB is free. Including non-PyTorch memory, this process has 20.65 GiB memory in use. Of the allocated memory 18.80 GiB is allocated by PyTorch, and 1.56 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-reformer.D1-0efae956f1553a76c1e03985181900f5.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "Reformer", "--batch-size", "64"], "time": 1712255858.685499, "return_code": 1}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/regnet_y_128gf.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/regnet_y_128gf.D0.data
new file mode 100644
index 000000000..e8d73d861
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/regnet_y_128gf.D0.data
@@ -0,0 +1,77 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "torchvision", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision", "plan": {"method": "per_gpu"}, "argv": {"--precision": "tf32-fp16", "--lr": 0.01, "--no-stdout": true, "--epochs": 50, "--model": "regnet_y_128gf", "--batch-size": 64}, "tags": ["classification", "convnet", "lstm", "resnet", "vision"], "weight": 2.0, "name": "regnet_y_128gf", "tag": ["regnet_y_128gf", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712255794.463088, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-regnet_y_128gf.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32-fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "regnet_y_128gf", "--batch-size", "64"], "time": 1712255796.8223135}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "progress": [0, 50]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2412.5625, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24379.8125, 24512.0], "load": 0.08, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24449.8125, 24512.0], "load": 0.08, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 70.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 62.19 MiB is free. Including non-PyTorch memory, this process has 21.52 GiB memory in use. Of the allocated memory 20.37 GiB is allocated by PyTorch, and 892.38 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"}
+{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"}
+{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"}
+{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"}
+{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 224, in \n", "pipe": "stderr"}
+{"event": "line", "data": " main()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 218, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " train_epoch(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 51, in train_epoch\n", "pipe": "stderr"}
+{"event": "line", "data": " output = model(inp)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/regnet.py\", line 378, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " x = self.trunk_output(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/regnet.py\", line 147, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " x = x + self.f(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py\", line 460, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._conv_forward(input, self.weight, self.bias)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py\", line 456, in _conv_forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return F.conv2d(input, weight, bias, self.stride,\n", "pipe": "stderr"}
+{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 70.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 62.19 MiB is free. Including non-PyTorch memory, this process has 21.52 GiB memory in use. Of the allocated memory 20.37 GiB is allocated by PyTorch, and 892.38 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-regnet_y_128gf.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32-fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "regnet_y_128gf", "--batch-size", "64"], "time": 1712255805.1206453, "return_code": 1}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/regnet_y_128gf.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/regnet_y_128gf.D1.data
new file mode 100644
index 000000000..4db96aece
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/regnet_y_128gf.D1.data
@@ -0,0 +1,77 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "torchvision", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision", "plan": {"method": "per_gpu"}, "argv": {"--precision": "tf32-fp16", "--lr": 0.01, "--no-stdout": true, "--epochs": 50, "--model": "regnet_y_128gf", "--batch-size": 64}, "tags": ["classification", "convnet", "lstm", "resnet", "vision"], "weight": 2.0, "name": "regnet_y_128gf", "tag": ["regnet_y_128gf", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.75, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09830899151436032}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712255796.804558, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-regnet_y_128gf.D1-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32-fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "regnet_y_128gf", "--batch-size", "64"], "time": 1712255796.8299522}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "progress": [0, 50]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2412.5625, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24379.8125, 24512.0], "load": 0.08, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24449.8125, 24512.0], "load": 0.08, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 70.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 62.19 MiB is free. Including non-PyTorch memory, this process has 21.52 GiB memory in use. Of the allocated memory 20.37 GiB is allocated by PyTorch, and 892.38 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"}
+{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"}
+{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"}
+{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"}
+{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 224, in \n", "pipe": "stderr"}
+{"event": "line", "data": " main()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 218, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " train_epoch(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 51, in train_epoch\n", "pipe": "stderr"}
+{"event": "line", "data": " output = model(inp)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/regnet.py\", line 378, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " x = self.trunk_output(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/regnet.py\", line 147, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " x = x + self.f(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py\", line 460, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._conv_forward(input, self.weight, self.bias)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py\", line 456, in _conv_forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return F.conv2d(input, weight, bias, self.stride,\n", "pipe": "stderr"}
+{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 70.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 62.19 MiB is free. Including non-PyTorch memory, this process has 21.52 GiB memory in use. Of the allocated memory 20.37 GiB is allocated by PyTorch, and 892.38 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-regnet_y_128gf.D1-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32-fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "regnet_y_128gf", "--batch-size", "64"], "time": 1712255805.409879, "return_code": 1}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/resnet152-multi.0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/resnet152-multi.0.data
new file mode 100644
index 000000000..db2f3df08
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/resnet152-multi.0.data
@@ -0,0 +1,184 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "timm", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm", "plan": {"method": "njobs", "n": 1}, "argv": {"--amp": true, "--model": "resnet152", "--batch-size": 256}, "tags": ["classification", "convnet", "multigpu", "resnet", "vision"], "weight": 5.0, "name": "resnet152-multi", "tag": ["resnet152-multi", "0"], "job-number": 0, "devices": ["0", "1"]}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712255890.820334, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["torchrun", "--nproc_per_node=2", "--", "-m", "voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-resnet152-multi.0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "resnet152", "--batch-size", "256", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/tuzazolu.2024-04-04_18:28:45.589863/resnet152-multi.0", "--checkpoint-hist", "1"], "time": 1712255890.8366482}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "line", "data": "Training in distributed mode with multiple processes, 1 device per process.Process 1, total 2, device cuda:1.\n", "pipe": "stderr"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "line", "data": "Training in distributed mode with multiple processes, 1 device per process.Process 0, total 2, device cuda:0.\n", "pipe": "stderr"}
+{"event": "line", "data": "Model resnet152 created, param count:60192808\n", "pipe": "stderr"}
+{"event": "line", "data": "Data processing configuration for current model + dataset:\n", "pipe": "stderr"}
+{"event": "line", "data": "\tinput_size: (3, 224, 224)\n", "pipe": "stderr"}
+{"event": "line", "data": "\tinterpolation: bicubic\n", "pipe": "stderr"}
+{"event": "line", "data": "\tmean: (0.485, 0.456, 0.406)\n", "pipe": "stderr"}
+{"event": "line", "data": "\tstd: (0.229, 0.224, 0.225)\n", "pipe": "stderr"}
+{"event": "line", "data": "\tcrop_pct: 0.95\n", "pipe": "stderr"}
+{"event": "line", "data": "\tcrop_mode: center\n", "pipe": "stderr"}
+{"event": "line", "data": "Learning rate (0.2) calculated from base learning rate (0.1) and global batch size (512) with linear scaling.\n", "pipe": "stderr"}
+{"event": "line", "data": "Using native Torch AMP. Training in mixed precision.\n", "pipe": "stderr"}
+{"event": "line", "data": "Using native Torch DistributedDataParallel.\n", "pipe": "stderr"}
+{"event": "line", "data": "Scheduled epochs: 300. LR stepped per epoch.\n", "pipe": "stderr"}
+{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 98.00 MiB. GPU 1 has a total capacty of 23.73 GiB of which 98.25 MiB is free. Including non-PyTorch memory, this process has 21.49 GiB memory in use. Of the allocated memory 20.87 GiB is allocated by PyTorch, and 237.64 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [15015.75, 24512.0], "load": 0.25, "temperature": null, "power": null}, "1": {"memory": [14779.75, 24512.0], "load": 0.26, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/home/ubuntu/miniconda3/lib/python3.10/runpy.py\", line 196, in _run_module_as_main\n", "pipe": "stderr"}
+{"event": "line", "data": " return _run_code(code, main_globals, None,\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/home/ubuntu/miniconda3/lib/python3.10/runpy.py\", line 86, in _run_code\n", "pipe": "stderr"}
+{"event": "line", "data": " exec(code, run_globals)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/__main__.py\", line 4, in \n", "pipe": "stderr"}
+{"event": "line", "data": " main()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"}
+{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"}
+{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"}
+{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 1035, in \n", "pipe": "stderr"}
+{"event": "line", "data": " main()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 759, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " train_metrics = train_one_epoch(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 874, in train_one_epoch\n", "pipe": "stderr"}
+{"event": "line", "data": " output = model(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/parallel/distributed.py\", line 1519, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " else self._run_ddp_forward(*inputs, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/parallel/distributed.py\", line 1355, in _run_ddp_forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return self.module(*inputs, **kwargs) # type: ignore[index]\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/resnet.py\", line 835, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " x = self.forward_features(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/resnet.py\", line 824, in forward_features\n", "pipe": "stderr"}
+{"event": "line", "data": " x = self.layer3(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/resnet.py\", line 485, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " x = self.conv3(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py\", line 460, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._conv_forward(input, self.weight, self.bias)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py\", line 456, in _conv_forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return F.conv2d(input, weight, bias, self.stride,\n", "pipe": "stderr"}
+{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 98.00 MiB. GPU 1 has a total capacty of 23.73 GiB of which 98.25 MiB is free. Including non-PyTorch memory, this process has 21.49 GiB memory in use. Of the allocated memory 20.87 GiB is allocated by PyTorch, and 237.64 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24455.75, 24512.0], "load": 0.32, "temperature": null, "power": null}, "1": {"memory": [24413.75, 24512.0], "load": 0.32, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 26.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 56.25 MiB is free. Including non-PyTorch memory, this process has 21.53 GiB memory in use. Of the allocated memory 20.82 GiB is allocated by PyTorch, and 328.64 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/home/ubuntu/miniconda3/lib/python3.10/runpy.py\", line 196, in _run_module_as_main\n", "pipe": "stderr"}
+{"event": "line", "data": " return _run_code(code, main_globals, None,\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/home/ubuntu/miniconda3/lib/python3.10/runpy.py\", line 86, in _run_code\n", "pipe": "stderr"}
+{"event": "line", "data": " exec(code, run_globals)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/__main__.py\", line 4, in \n", "pipe": "stderr"}
+{"event": "line", "data": " main()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"}
+{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"}
+{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"}
+{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 1035, in \n", "pipe": "stderr"}
+{"event": "line", "data": " main()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 759, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " train_metrics = train_one_epoch(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 874, in train_one_epoch\n", "pipe": "stderr"}
+{"event": "line", "data": " output = model(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/parallel/distributed.py\", line 1519, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " else self._run_ddp_forward(*inputs, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/parallel/distributed.py\", line 1355, in _run_ddp_forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return self.module(*inputs, **kwargs) # type: ignore[index]\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/resnet.py\", line 835, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " x = self.forward_features(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/resnet.py\", line 824, in forward_features\n", "pipe": "stderr"}
+{"event": "line", "data": " x = self.layer3(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/resnet.py\", line 479, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " x = self.conv2(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py\", line 460, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._conv_forward(input, self.weight, self.bias)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py\", line 456, in _conv_forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return F.conv2d(input, weight, bias, self.stride,\n", "pipe": "stderr"}
+{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 26.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 56.25 MiB is free. Including non-PyTorch memory, this process has 21.53 GiB memory in use. Of the allocated memory 20.82 GiB is allocated by PyTorch, and 328.64 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"}
+{"event": "line", "data": "[2024-04-04 18:38:21,826] torch.distributed.elastic.multiprocessing.api: [ERROR] failed (exitcode: 1) local_rank: 0 (pid: 33457) of binary: /mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/python\n", "pipe": "stderr"}
+{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/torchrun\", line 8, in \n", "pipe": "stderr"}
+{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py\", line 346, in wrapper\n", "pipe": "stderr"}
+{"event": "line", "data": " return f(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/distributed/run.py\", line 806, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " run(args)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/distributed/run.py\", line 797, in run\n", "pipe": "stderr"}
+{"event": "line", "data": " elastic_launch(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/distributed/launcher/api.py\", line 134, in __call__\n", "pipe": "stderr"}
+{"event": "line", "data": " return launch_agent(self._config, self._entrypoint, list(args))\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/distributed/launcher/api.py\", line 264, in launch_agent\n", "pipe": "stderr"}
+{"event": "line", "data": " raise ChildFailedError(\n", "pipe": "stderr"}
+{"event": "line", "data": "torch.distributed.elastic.multiprocessing.errors.ChildFailedError: \n", "pipe": "stderr"}
+{"event": "line", "data": "============================================================\n", "pipe": "stderr"}
+{"event": "line", "data": "voir FAILED\n", "pipe": "stderr"}
+{"event": "line", "data": "------------------------------------------------------------\n", "pipe": "stderr"}
+{"event": "line", "data": "Failures:\n", "pipe": "stderr"}
+{"event": "line", "data": "[1]:\n", "pipe": "stderr"}
+{"event": "line", "data": " time : 2024-04-04_18:38:21\n", "pipe": "stderr"}
+{"event": "line", "data": " host : decentoriole.internal.cloudapp.net\n", "pipe": "stderr"}
+{"event": "line", "data": " rank : 1 (local_rank: 1)\n", "pipe": "stderr"}
+{"event": "line", "data": " exitcode : 1 (pid: 33458)\n", "pipe": "stderr"}
+{"event": "line", "data": " error_file: \n", "pipe": "stderr"}
+{"event": "line", "data": " traceback : To enable traceback see: https://pytorch.org/docs/stable/elastic/errors.html\n", "pipe": "stderr"}
+{"event": "line", "data": "------------------------------------------------------------\n", "pipe": "stderr"}
+{"event": "line", "data": "Root Cause (first observed failure):\n", "pipe": "stderr"}
+{"event": "line", "data": "[0]:\n", "pipe": "stderr"}
+{"event": "line", "data": " time : 2024-04-04_18:38:21\n", "pipe": "stderr"}
+{"event": "line", "data": " host : decentoriole.internal.cloudapp.net\n", "pipe": "stderr"}
+{"event": "line", "data": " rank : 0 (local_rank: 0)\n", "pipe": "stderr"}
+{"event": "line", "data": " exitcode : 1 (pid: 33457)\n", "pipe": "stderr"}
+{"event": "line", "data": " error_file: \n", "pipe": "stderr"}
+{"event": "line", "data": " traceback : To enable traceback see: https://pytorch.org/docs/stable/elastic/errors.html\n", "pipe": "stderr"}
+{"event": "line", "data": "============================================================\n", "pipe": "stderr"}
+{"event": "end", "data": {"command": ["torchrun", "--nproc_per_node=2", "--", "-m", "voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-resnet152-multi.0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "resnet152", "--batch-size", "256", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/tuzazolu.2024-04-04_18:28:45.589863/resnet152-multi.0", "--checkpoint-hist", "1"], "time": 1712255902.1882682, "return_code": 1}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/resnet152.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/resnet152.D0.data
new file mode 100644
index 000000000..6debcec37
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/resnet152.D0.data
@@ -0,0 +1,69 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "timm", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm", "plan": {"method": "per_gpu"}, "argv": {"--amp": true, "--model": "resnet152", "--batch-size": 256}, "tags": ["classification", "convnet", "resnet", "vision"], "weight": 1.0, "name": "resnet152", "tag": ["resnet152", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712255878.736862, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-resnet152.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "resnet152", "--batch-size", "256", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/tuzazolu.2024-04-04_18:28:45.589863/resnet152.D0", "--checkpoint-hist", "1"], "time": 1712255881.0318313}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "line", "data": "Training with a single process on 1 device (cuda:0).\n", "pipe": "stderr"}
+{"event": "line", "data": "Model resnet152 created, param count:60192808\n", "pipe": "stderr"}
+{"event": "line", "data": "Data processing configuration for current model + dataset:\n", "pipe": "stderr"}
+{"event": "line", "data": "\tinput_size: (3, 224, 224)\n", "pipe": "stderr"}
+{"event": "line", "data": "\tinterpolation: bicubic\n", "pipe": "stderr"}
+{"event": "line", "data": "\tmean: (0.485, 0.456, 0.406)\n", "pipe": "stderr"}
+{"event": "line", "data": "\tstd: (0.229, 0.224, 0.225)\n", "pipe": "stderr"}
+{"event": "line", "data": "\tcrop_pct: 0.95\n", "pipe": "stderr"}
+{"event": "line", "data": "\tcrop_mode: center\n", "pipe": "stderr"}
+{"event": "line", "data": "Learning rate (0.1) calculated from base learning rate (0.1) and global batch size (256) with linear scaling.\n", "pipe": "stderr"}
+{"event": "line", "data": "Using native Torch AMP. Training in mixed precision.\n", "pipe": "stderr"}
+{"event": "line", "data": "Scheduled epochs: 300. LR stepped per epoch.\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24389.8125, 24512.0], "load": 0.32, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 98.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 122.19 MiB is free. Including non-PyTorch memory, this process has 21.46 GiB memory in use. Of the allocated memory 20.92 GiB is allocated by PyTorch, and 248.63 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"}
+{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"}
+{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"}
+{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"}
+{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 1035, in \n", "pipe": "stderr"}
+{"event": "line", "data": " main()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 759, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " train_metrics = train_one_epoch(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 874, in train_one_epoch\n", "pipe": "stderr"}
+{"event": "line", "data": " output = model(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/resnet.py\", line 835, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " x = self.forward_features(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/resnet.py\", line 824, in forward_features\n", "pipe": "stderr"}
+{"event": "line", "data": " x = self.layer3(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/resnet.py\", line 485, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " x = self.conv3(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py\", line 460, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._conv_forward(input, self.weight, self.bias)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py\", line 456, in _conv_forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return F.conv2d(input, weight, bias, self.stride,\n", "pipe": "stderr"}
+{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 98.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 122.19 MiB is free. Including non-PyTorch memory, this process has 21.46 GiB memory in use. Of the allocated memory 20.92 GiB is allocated by PyTorch, and 248.63 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-resnet152.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "resnet152", "--batch-size", "256", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/tuzazolu.2024-04-04_18:28:45.589863/resnet152.D0", "--checkpoint-hist", "1"], "time": 1712255888.4096215, "return_code": 1}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/resnet152.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/resnet152.D1.data
new file mode 100644
index 000000000..86517af01
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/resnet152.D1.data
@@ -0,0 +1,69 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "timm", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm", "plan": {"method": "per_gpu"}, "argv": {"--amp": true, "--model": "resnet152", "--batch-size": 256}, "tags": ["classification", "convnet", "resnet", "vision"], "weight": 1.0, "name": "resnet152", "tag": ["resnet152", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.75, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09830899151436032}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712255881.015572, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-resnet152.D1-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "resnet152", "--batch-size", "256", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/tuzazolu.2024-04-04_18:28:45.589863/resnet152.D1", "--checkpoint-hist", "1"], "time": 1712255881.0389137}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "line", "data": "Training with a single process on 1 device (cuda:0).\n", "pipe": "stderr"}
+{"event": "line", "data": "Model resnet152 created, param count:60192808\n", "pipe": "stderr"}
+{"event": "line", "data": "Data processing configuration for current model + dataset:\n", "pipe": "stderr"}
+{"event": "line", "data": "\tinput_size: (3, 224, 224)\n", "pipe": "stderr"}
+{"event": "line", "data": "\tinterpolation: bicubic\n", "pipe": "stderr"}
+{"event": "line", "data": "\tmean: (0.485, 0.456, 0.406)\n", "pipe": "stderr"}
+{"event": "line", "data": "\tstd: (0.229, 0.224, 0.225)\n", "pipe": "stderr"}
+{"event": "line", "data": "\tcrop_pct: 0.95\n", "pipe": "stderr"}
+{"event": "line", "data": "\tcrop_mode: center\n", "pipe": "stderr"}
+{"event": "line", "data": "Learning rate (0.1) calculated from base learning rate (0.1) and global batch size (256) with linear scaling.\n", "pipe": "stderr"}
+{"event": "line", "data": "Using native Torch AMP. Training in mixed precision.\n", "pipe": "stderr"}
+{"event": "line", "data": "Scheduled epochs: 300. LR stepped per epoch.\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24451.8125, 24512.0], "load": 0.3, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 26.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 60.19 MiB is free. Including non-PyTorch memory, this process has 21.52 GiB memory in use. Of the allocated memory 20.87 GiB is allocated by PyTorch, and 359.13 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"}
+{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"}
+{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"}
+{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"}
+{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 1035, in \n", "pipe": "stderr"}
+{"event": "line", "data": " main()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 759, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " train_metrics = train_one_epoch(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 874, in train_one_epoch\n", "pipe": "stderr"}
+{"event": "line", "data": " output = model(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/resnet.py\", line 835, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " x = self.forward_features(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/resnet.py\", line 824, in forward_features\n", "pipe": "stderr"}
+{"event": "line", "data": " x = self.layer3(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/resnet.py\", line 479, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " x = self.conv2(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py\", line 460, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._conv_forward(input, self.weight, self.bias)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py\", line 456, in _conv_forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return F.conv2d(input, weight, bias, self.stride,\n", "pipe": "stderr"}
+{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 26.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 60.19 MiB is free. Including non-PyTorch memory, this process has 21.52 GiB memory in use. Of the allocated memory 20.87 GiB is allocated by PyTorch, and 359.13 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-resnet152.D1-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "resnet152", "--batch-size", "256", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/tuzazolu.2024-04-04_18:28:45.589863/resnet152.D1", "--checkpoint-hist", "1"], "time": 1712255888.4892821, "return_code": 1}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/resnet50.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/resnet50.D0.data
new file mode 100644
index 000000000..e939444f3
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/resnet50.D0.data
@@ -0,0 +1,1317 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "torchvision", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision", "plan": {"method": "per_gpu"}, "argv": {"--precision": "tf32-fp16", "--lr": 0.01, "--no-stdout": true, "--epochs": 50, "--model": "resnet50", "--batch-size": 64}, "tags": ["classification", "convnet", "resnet", "vision"], "weight": 1.0, "name": "resnet50", "tag": ["resnet50", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712255678.375334, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-resnet50.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32-fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "resnet50", "--batch-size", "64"], "time": 1712255680.699123}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "progress": [0, 50]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.01910400390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.14129638671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.14862060546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.10113525390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0660400390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.13128662109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.06024169921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.08062744140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.1383056640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.08367919921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.15252685546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.1346435546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.12567138671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.1083984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.07647705078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.16888427734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.1417236328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5837.8125, 24512.0], "load": 0.5, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.10540771484375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.03192138671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.125244140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0218505859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97784423828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0223388671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9576416015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97723388671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.026123046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.03466796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.117919921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.06207275390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.98907470703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.05682373046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.1102294921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.992431640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0548095703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.03485107421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9190673828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.1063232421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9847412109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.03143310546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.95245361328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.90667724609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.94012451171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.148681640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.05609130859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.05523681640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5837.8125, 24512.0], "load": 0.89, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.06109619140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.06329345703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.07000732421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0341796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0882568359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.03497314453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.00689697265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.98052978515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.08697509765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.1002197265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.08203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 572.1126740233907, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.95965576171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.09124755859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.03094482421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93170166015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.95501708984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9737548828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0040283203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.10992431640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "progress": [1, 50]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 580.348844062986, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.89483642578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.88623046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.84197998046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.776611328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5837.8125, 24512.0], "load": 0.77, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.86297607421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97552490234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 405.8079451227742, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.88336181640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8970947265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9532470703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8927001953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.99560546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97161865234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8377685546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93768310546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8402099609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97271728515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 581.2733954821643, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97845458984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.7388916015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.984619140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.00238037109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96630859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.91485595703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.008056640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.915283203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96917724609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 575.9091595446283, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9864501953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.09857177734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97723388671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97802734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.979736328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.970703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5837.8125, 24512.0], "load": 0.89, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.95098876953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92535400390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9305419921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 575.7359657588951, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.01165771484375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.94598388671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.99298095703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0369873046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.89678955078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92120361328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0009765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93212890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.99786376953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 575.0024466553757, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.897705078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92364501953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93316650390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.04547119140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.989013671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9619140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.01519775390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9361572265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.99517822265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 574.6046994749636, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.01025390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93121337890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97625732421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.028564453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9761962890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.95465087890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5837.8125, 24512.0], "load": 0.89, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.15826416015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9326171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96490478515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 576.5207671733423, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.00274658203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0006103515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9759521484375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "progress": [2, 50]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.729736328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.7237548828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8052978515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 435.56210754897063, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8165283203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.88201904296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.858642578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.78741455078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9552001953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.99932861328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8310546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.91802978515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.89263916015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 573.2884214406574, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.99627685546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8643798828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.817138671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.00042724609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.88897705078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.91864013671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5837.8125, 24512.0], "load": 0.79, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92852783203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.87933349609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.87811279296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 573.4048014061, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.84014892578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9122314453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96112060546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.945068359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96295166015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.03204345703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9461669921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.02197265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9356689453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 575.6347367341526, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.91021728515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.89697265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9205322265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.82989501953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.86083984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96795654296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9688720703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9881591796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 565.461491080691, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.966796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93280029296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.07684326171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96673583984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9493408203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.936767578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9932861328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5837.8125, 24512.0], "load": 0.89, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97216796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.89306640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 567.2251321424063, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0050048828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96368408203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.931640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0535888671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97381591796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9410400390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0421142578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.98626708984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.98382568359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 570.0599739315137, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.83575439453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.04046630859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.95758056640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0806884765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.95721435546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.89801025390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.00994873046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96990966796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "progress": [3, 50]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.81915283203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 574.2348067977822, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8685302734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.82470703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.86651611328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.86968994140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5837.8125, 24512.0], "load": 0.79, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.77001953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.7615966796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.83642578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 451.5874267754516, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.7822265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.85400390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.89483642578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96588134765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9073486328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9671630859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8880615234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.7904052734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8543701171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 576.412743803677, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.86492919921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.95697021484375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.83990478515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9840087890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.94183349609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8853759765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.95355224609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93658447265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.01934814453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 561.3534957865011, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.89849853515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.90081787109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.00103759765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.87249755859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.893310546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.94775390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5837.8125, 24512.0], "load": 0.89, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92236328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92474365234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9840087890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 569.7193579652935, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8936767578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0164794921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9599609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97174072265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.02459716796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.89459228515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.00933837890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.95965576171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96136474609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 574.0104922620504, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.951904296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0091552734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97320556640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.938720703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.035888671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.7872314453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9674072265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9561767578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.98101806640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 571.3174286509386, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9078369140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.95440673828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.86907958984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8763427734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.85687255859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.98529052734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5837.8125, 24512.0], "load": 0.89, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.01007080078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0015869140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.98931884765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 567.9075860864638, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.07196044921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.90740966796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "progress": [4, 50]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.76171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.79718017578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8758544921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.757080078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 426.0030863923609, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.74407958984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8837890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8330078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8489990234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.886962890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.90087890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.85711669921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.87554931640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.87847900390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 572.0116942229658, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.990478515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.844482421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93585205078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.84326171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.79632568359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.83502197265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5837.8125, 24512.0], "load": 0.78, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.90557861328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.79718017578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9254150390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 569.2570534284409, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.89666748046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.03643798828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.79534912109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.99017333984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97515869140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9012451171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.984130859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.95159912109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.94427490234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 574.260241844676, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93963623046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.98297119140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9666748046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.85235595703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92718505859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.04656982421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0394287109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.895751953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.81536865234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 565.6000572277281, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92144775390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.017822265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.886474609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.11016845703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.90948486328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.01763916015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5837.8125, 24512.0], "load": 0.89, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9462890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96368408203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9893798828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 575.9483316751655, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.02923583984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93634033203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.00244140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.02734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.954833984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.00177001953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.86456298828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.01214599609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9852294921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 570.0673992170645, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.99310302734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93255615234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0482177734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9892578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.00762939453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.12774658203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "progress": [5, 50]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.7882080078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 577.5541043660687, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93310546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.812255859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.872802734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.91485595703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.78887939453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5837.8125, 24512.0], "load": 0.79, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97998046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.85943603515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.79376220703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 456.59009531270084, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8995361328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96575927734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.91510009765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.91705322265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92816162109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.7269287109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9024658203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8709716796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.94378662109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 574.1525470527383, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8624267578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.988037109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.87945556640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.883056640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.06353759765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.88275146484375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8873291015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.94964599609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.02496337890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 561.9250919141896, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9970703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9150390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96051025390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.89263916015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96856689453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9273681640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5837.8125, 24512.0], "load": 0.89, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.83465576171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.80810546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0576171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 571.3177318211139, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.899169921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8436279296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.94610595703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.99530029296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96063232421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.83734130859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.919189453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.947998046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.04193115234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 573.5795939165216, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92437744140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.958740234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92291259765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.02398681640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.950439453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92779541015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9212646484375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9705810546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97064208984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 573.0953823927051, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.884033203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0501708984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.01141357421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9388427734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9471435546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.86895751953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5837.8125, 24512.0], "load": 0.89, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.13372802734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9693603515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.01904296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.94915771484375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 578.6938484520202, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "progress": [6, 50]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8272705078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.77197265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.74981689453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8818359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.85467529296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8621826171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 429.92751688530814, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.88482666015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.938232421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9154052734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8677978515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.85736083984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9222412109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8905029296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8768310546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.87493896484375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 566.540881050484, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.94610595703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93951416015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.86285400390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9405517578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.89117431640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5837.8125, 24512.0], "load": 0.79, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9681396484375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.916259765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93115234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8900146484375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 558.7132570629016, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9620361328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8468017578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.08172607421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8963623046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97369384765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96624755859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.01641845703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.91741943359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97906494140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 569.3163028924275, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93377685546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8817138671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.91033935546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.803466796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9432373046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9241943359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.02960205078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.06591796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.86810302734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 559.0276765625225, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.98382568359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.04705810546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9481201171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.89422607421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5837.8125, 24512.0], "load": 0.88, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.932861328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92510986328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.86151123046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.88433837890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96588134765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 561.8765233730917, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.05517578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.895263671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9244384765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.86724853515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.18231201171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.015869140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.01800537109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9686279296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9774169921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 569.8678943525947, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8953857421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0140380859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.99749755859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.94049072265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "progress": [7, 50]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.904052734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.78289794921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 425.6825269757262, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8101806640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.73468017578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92413330078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.88800048828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5837.8125, 24512.0], "load": 0.79, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92901611328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.814208984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.79412841796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9886474609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.76678466796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 544.5606316444989, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8865966796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.877197265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.80401611328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.832763671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.725341796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.90673828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.84552001953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8594970703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97454833984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 558.6010243107713, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.05029296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.89898681640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.863525390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92596435546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96124267578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.90216064453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96533203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8924560546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.928955078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 574.3507340622675, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.99713134765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.08917236328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.83929443359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8880615234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5837.8125, 24512.0], "load": 0.89, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.88873291015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.00555419921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.73919677734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0775146484375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.86798095703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 571.5214844927647, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9541015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96783447265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.95538330078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.99176025390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.99755859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9661865234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0029296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.020263671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.972412109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 566.4233869041755, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9315185546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.01971435546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96453857421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96722412109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9183349609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.95294189453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8988037109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.09783935546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.03515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 570.3860957741948, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9989013671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.01422119140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0205078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9864501953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5837.8125, 24512.0], "load": 0.89, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.86346435546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93499755859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.81903076171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0323486328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "progress": [8, 50]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 579.3909853972872, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.7816162109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.88311767578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9239501953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.81005859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.7730712890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.837890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9024658203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 417.7012759244583, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.80145263671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.80615234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9471435546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.82958984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0303955078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.87921142578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9085693359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.90576171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.73345947265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 552.4939159557017, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.80206298828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.916259765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8074951171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9190673828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5837.8125, 24512.0], "load": 0.78, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9952392578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.99249267578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.94091796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.88726806640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 550.6372648864318, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.86572265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96881103515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.88702392578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9696044921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92523193359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9544677734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.94122314453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9337158203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9190673828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 561.7081264832592, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0208740234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.03631591796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97393798828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96246337890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0494384765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.81549072265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8983154296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9752197265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8819580078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 555.5399100985388, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.99969482421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.90478515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.91192626953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5837.8125, 24512.0], "load": 0.87, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.965576171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.02691650390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.95599365234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8226318359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9794921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9971923828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 560.3661852150547, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.90887451171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9691162109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9595947265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9736328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.911865234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.85284423828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.10369873046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8717041015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.015380859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 563.5735283189201, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5837.8125, 24512.0], "load": 0.88, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-resnet50.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32-fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "resnet50", "--batch-size", "64"], "time": 1712255750.8894126, "return_code": 0}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/resnet50.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/resnet50.D1.data
new file mode 100644
index 000000000..987a3fb7b
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/resnet50.D1.data
@@ -0,0 +1,1311 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "torchvision", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision", "plan": {"method": "per_gpu"}, "argv": {"--precision": "tf32-fp16", "--lr": 0.01, "--no-stdout": true, "--epochs": 50, "--model": "resnet50", "--batch-size": 64}, "tags": ["classification", "convnet", "resnet", "vision"], "weight": 1.0, "name": "resnet50", "tag": ["resnet50", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.75, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09830899151436032}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712255680.681726, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-resnet50.D1-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32-fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "resnet50", "--batch-size", "64"], "time": 1712255680.706513}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "progress": [0, 50]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.01910400390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.14129638671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.14862060546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.10113525390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0660400390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.13128662109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.06024169921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.08062744140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.1383056640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.08367919921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.15252685546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.1346435546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.12567138671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.1083984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.07647705078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.16888427734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.1417236328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5837.8125, 24512.0], "load": 0.49, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.10540771484375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.03192138671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.125244140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0218505859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97784423828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0223388671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9576416015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97723388671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.026123046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.03466796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.117919921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.06207275390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.98907470703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.05682373046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.1102294921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.992431640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0548095703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.03485107421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9190673828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.1063232421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9847412109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.03143310546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.95245361328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.90667724609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.94012451171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.148681640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.05609130859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5837.8125, 24512.0], "load": 0.89, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.05523681640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.06109619140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.06329345703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.07000732421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0341796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0882568359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.03497314453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.00689697265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.98052978515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.08697509765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.1002197265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.08203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 570.1563523729465, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.95965576171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.09124755859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.03094482421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93170166015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.95501708984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9737548828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0040283203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.10992431640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "progress": [1, 50]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 582.8048916927227, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.89483642578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.88623046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.84197998046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5837.8125, 24512.0], "load": 0.76, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.776611328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.86297607421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97552490234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 433.1616331155187, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.88336181640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8970947265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9532470703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8927001953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.99560546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97161865234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8377685546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93768310546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8402099609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 569.7099898121512, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97271728515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97845458984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.7388916015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.984619140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.00238037109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96630859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.91485595703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.008056640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.915283203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 552.8042857349994, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96917724609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9864501953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.09857177734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97723388671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97802734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.979736328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5837.8125, 24512.0], "load": 0.88, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.970703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.95098876953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92535400390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 566.87817229171, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9305419921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.01165771484375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.94598388671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.99298095703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0369873046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.89678955078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92120361328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0009765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93212890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 565.5704199506474, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.99786376953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.897705078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92364501953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93316650390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.04547119140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.989013671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9619140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.01519775390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9361572265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 558.5055616778927, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.99517822265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.01025390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93121337890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97625732421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.028564453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9761962890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5837.8125, 24512.0], "load": 0.87, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.95465087890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.15826416015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9326171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 569.1175752260376, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96490478515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.00274658203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0006103515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9759521484375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "progress": [2, 50]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.729736328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 571.5084703538287, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.7237548828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8052978515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8165283203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.88201904296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.858642578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.78741455078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9552001953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.99932861328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8310546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 484.1458238009069, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.91802978515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.89263916015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.99627685546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8643798828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.817138671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.00042724609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5837.8125, 24512.0], "load": 0.77, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.88897705078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.91864013671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92852783203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 570.2715097705955, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.87933349609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.87811279296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.84014892578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9122314453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96112060546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.945068359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96295166015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.03204345703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9461669921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 554.6307761393889, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.02197265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9356689453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.91021728515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.89697265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9205322265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.82989501953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.86083984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96795654296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9688720703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 558.7501865738172, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9881591796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.966796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93280029296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.07684326171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96673583984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9493408203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5837.8125, 24512.0], "load": 0.87, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.936767578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9932861328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97216796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 553.0826912625816, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.89306640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0050048828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96368408203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.931640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0535888671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97381591796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9410400390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0421142578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.98626708984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 563.6315674306203, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.98382568359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.83575439453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.04046630859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.95758056640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0806884765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.95721435546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.89801025390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.00994873046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96990966796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 571.6441158537912, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "progress": [3, 50]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.81915283203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8685302734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5837.8125, 24512.0], "load": 0.76, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.82470703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.86651611328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.86968994140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.77001953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 434.73906882971664, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.7615966796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.83642578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.7822265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.85400390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.89483642578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96588134765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9073486328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9671630859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8880615234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 543.6334542072432, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.7904052734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8543701171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.86492919921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.95697021484375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.83990478515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9840087890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.94183349609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8853759765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.95355224609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 562.5089556479531, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93658447265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.01934814453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.89849853515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.90081787109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.00103759765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5837.8125, 24512.0], "load": 0.88, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.87249755859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.893310546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.94775390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92236328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 567.6088695023132, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92474365234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9840087890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8936767578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0164794921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9599609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97174072265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.02459716796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.89459228515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.00933837890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 557.7502916561972, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.95965576171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96136474609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.951904296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0091552734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97320556640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.938720703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.035888671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.7872314453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9674072265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 570.5971779422605, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9561767578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.98101806640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9078369140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.95440673828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.86907958984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5837.8125, 24512.0], "load": 0.88, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8763427734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.85687255859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.98529052734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.01007080078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 563.1773665273919, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0015869140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.98931884765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.07196044921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.90740966796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "progress": [4, 50]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.76171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.79718017578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 432.6962119370114, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8758544921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.757080078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.74407958984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8837890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8330078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8489990234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.886962890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.90087890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.85711669921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 576.1582136216188, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.87554931640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.87847900390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.990478515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.844482421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5837.8125, 24512.0], "load": 0.76, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93585205078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.84326171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.79632568359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.83502197265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.90557861328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 572.6980333200548, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.79718017578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9254150390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.89666748046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.03643798828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.79534912109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.99017333984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97515869140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9012451171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.984130859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 570.1022816346485, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.95159912109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.94427490234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93963623046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.98297119140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9666748046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.85235595703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92718505859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.04656982421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0394287109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 557.203147974518, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.895751953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.81536865234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92144775390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.017822265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5837.8125, 24512.0], "load": 0.89, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.886474609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.11016845703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.90948486328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.01763916015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9462890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96368408203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 579.042804389421, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9893798828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.02923583984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93634033203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.00244140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.02734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.954833984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.00177001953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.86456298828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.01214599609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 578.9910544706008, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9852294921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.99310302734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93255615234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0482177734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9892578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.00762939453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.12774658203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "progress": [5, 50]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 578.5008305995159, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.7882080078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93310546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5837.8125, 24512.0], "load": 0.78, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.812255859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.872802734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.91485595703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.78887939453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97998046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 438.27047199980177, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.85943603515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.79376220703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8995361328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96575927734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.91510009765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.91705322265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92816162109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.7269287109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9024658203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 562.6331424079116, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8709716796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.94378662109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8624267578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.988037109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.87945556640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.883056640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.06353759765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.88275146484375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8873291015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 558.5106511403991, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.94964599609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.02496337890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9970703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5837.8125, 24512.0], "load": 0.88, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9150390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96051025390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.89263916015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96856689453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9273681640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.83465576171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 564.6977257925548, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.80810546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0576171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.899169921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8436279296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.94610595703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.99530029296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96063232421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.83734130859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.919189453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 566.0334160618031, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.947998046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.04193115234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92437744140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.958740234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92291259765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.02398681640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.950439453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92779541015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9212646484375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 556.1042450773652, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9705810546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97064208984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.884033203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5837.8125, 24512.0], "load": 0.88, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0501708984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.01141357421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9388427734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9471435546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.86895751953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.13372802734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 576.5106212223228, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9693603515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.01904296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.94915771484375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "progress": [6, 50]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8272705078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.77197265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.74981689453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 444.99324420874984, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8818359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.85467529296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8621826171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.88482666015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.938232421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9154052734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8677978515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.85736083984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9222412109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 567.2363230338814, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8905029296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8768310546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.87493896484375}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5837.8125, 24512.0], "load": 0.79, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.94610595703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93951416015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.86285400390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9405517578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.89117431640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9681396484375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 561.3939957153904, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.916259765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93115234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8900146484375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9620361328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8468017578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.08172607421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8963623046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97369384765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96624755859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 565.1672533156722, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.01641845703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.91741943359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97906494140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93377685546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8817138671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.91033935546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.803466796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9432373046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9241943359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 564.3863450639242, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.02960205078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.06591796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.86810302734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5837.8125, 24512.0], "load": 0.88, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.98382568359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.04705810546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9481201171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.89422607421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.932861328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92510986328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 566.5973158437805, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.86151123046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.88433837890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96588134765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.05517578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.895263671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9244384765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.86724853515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.18231201171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.015869140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 571.0529856876307, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.01800537109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9686279296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9774169921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8953857421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0140380859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.99749755859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.94049072265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "progress": [7, 50]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.904052734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 570.244768110895, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.78289794921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5837.8125, 24512.0], "load": 0.79, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8101806640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.73468017578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92413330078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.88800048828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92901611328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.814208984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.79412841796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 469.0339847047376, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9886474609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.76678466796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8865966796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.877197265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.80401611328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.832763671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.725341796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.90673828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.84552001953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 573.3676513383086, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8594970703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97454833984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.05029296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.89898681640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.863525390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92596435546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96124267578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.90216064453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96533203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 574.9317342927358, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8924560546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.928955078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5837.8125, 24512.0], "load": 0.88, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.99713134765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.08917236328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.83929443359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8880615234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.88873291015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.00555419921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.73919677734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 573.82030626832, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0775146484375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.86798095703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9541015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96783447265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.95538330078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.99176025390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.99755859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9661865234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 562.1920114241282, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0029296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.020263671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.972412109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9315185546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.01971435546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96453857421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96722412109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9183349609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.95294189453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 571.7613121216148, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8988037109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.09783935546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.03515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5837.8125, 24512.0], "load": 0.88, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9989013671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.01422119140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0205078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9864501953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.86346435546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93499755859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 558.9910597231104, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.81903076171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0323486328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "progress": [8, 50]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.7816162109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.88311767578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9239501953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.81005859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 440.7401054764529, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.7730712890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.837890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9024658203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.80145263671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.80615234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9471435546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.82958984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0303955078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.87921142578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 564.337711196518, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9085693359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.90576171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5837.8125, 24512.0], "load": 0.78, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.73345947265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.80206298828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.916259765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8074951171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9190673828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9952392578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.99249267578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 559.5793015195344, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.94091796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.88726806640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.86572265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96881103515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.88702392578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9696044921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92523193359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9544677734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 547.5178738207987, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.94122314453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9337158203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9190673828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0208740234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.03631591796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97393798828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96246337890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0494384765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.81549072265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 551.8487495016799, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8983154296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9752197265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5837.8125, 24512.0], "load": 0.87, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8819580078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.99969482421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.90478515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.91192626953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.965576171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.02691650390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.95599365234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 549.6589266638944, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8226318359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9794921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9971923828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.90887451171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9691162109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9595947265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9736328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.911865234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.85284423828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 556.5315071993815, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5837.8125, 24512.0], "load": 0.87, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-resnet50.D1-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32-fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "resnet50", "--batch-size", "64"], "time": 1712255751.11649, "return_code": 0}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/rwkv.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/rwkv.D0.data
new file mode 100644
index 000000000..f67335a79
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/rwkv.D0.data
@@ -0,0 +1,437 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/rwkv", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "rwkv", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/rwkv", "tags": ["llm", "rnn", "unsupported-rocm"], "plan": {"method": "per_gpu"}, "argv": {"--data_type": "dummy", "--ctx_len": 128, "--epoch_steps": 1000, "--epoch_count": 20, "--epoch_begin": 0, "--epoch_save": 0, "--micro_bsz": 16, "--n_layer": 12, "--n_embd": 768, "--pre_ffn": 0, "--head_qk": 0, "--lr_init": "6e-4", "--lr_final": "1e-5", "--warmup_steps": 0, "--beta1": 0.9, "--beta2": 0.99, "--adam_eps": "1e-8", "--accelerator": "gpu", "--devices": 1, "--precision": "tf32", "--strategy": "ddp_find_unused_parameters_false", "--grad_cp": 0, "--random_seed": 1234, "--enable_progress_bar": "False"}, "weight": 1.0, "name": "rwkv", "tag": ["rwkv", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712256283.646602, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/rwkv/voirconf-rwkv.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/rwkv/rwkv-v4neo/train.py", "--data_type", "dummy", "--ctx_len", "128", "--epoch_steps", "1000", "--epoch_count", "20", "--epoch_begin", "0", "--epoch_save", "0", "--micro_bsz", "16", "--n_layer", "12", "--n_embd", "768", "--pre_ffn", "0", "--head_qk", "0", "--lr_init", "6e-4", "--lr_final", "1e-5", "--warmup_steps", "0", "--beta1", "0.9", "--beta2", "0.99", "--adam_eps", "1e-8", "--accelerator", "gpu", "--devices", "1", "--precision", "tf32", "--strategy", "ddp_find_unused_parameters_false", "--grad_cp", "0", "--random_seed", "1234", "--enable_progress_bar", "False"], "time": 1712256285.998636}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "line", "data": "[2024-04-04 18:44:47,872] [INFO] [real_accelerator.py:158:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", "pipe": "stdout"}
+{"event": "line", "data": "########## work in progress ##########\n", "pipe": "stderr"}
+{"event": "line", "data": "########## WARNING: GLOBAL SEED 1234 THIS WILL AFFECT MULTIGPU SAMPLING ##########\n", "pipe": "stdout"}
+{"event": "line", "data": "########## WARNING: GLOBAL SEED 1234 THIS WILL AFFECT MULTIGPU SAMPLING ##########\n", "pipe": "stdout"}
+{"event": "line", "data": "########## WARNING: GLOBAL SEED 1234 THIS WILL AFFECT MULTIGPU SAMPLING ##########\n", "pipe": "stdout"}
+{"event": "line", "data": "\n", "pipe": "stdout"}
+{"event": "line", "data": "Global seed set to 1234\n", "pipe": "stderr"}
+{"event": "line", "data": "\n", "pipe": "stderr"}
+{"event": "line", "data": "############################################################################\n", "pipe": "stderr"}
+{"event": "line", "data": "#\n", "pipe": "stderr"}
+{"event": "line", "data": "# RWKV-4 TF32 on 1x1 GPU, bsz 1x1x16=16, ddp_find_unused_parameters_false \n", "pipe": "stderr"}
+{"event": "line", "data": "#\n", "pipe": "stderr"}
+{"event": "line", "data": "# Data = (dummy), ProjDir = /Users/satyaortiz-gagne/travail/mila/milabench/proj/rwkv/\n", "pipe": "stderr"}
+{"event": "line", "data": "#\n", "pipe": "stderr"}
+{"event": "line", "data": "# Epoch = 0 to 19 (will continue afterwards), save every 0 epoch\n", "pipe": "stderr"}
+{"event": "line", "data": "#\n", "pipe": "stderr"}
+{"event": "line", "data": "# Each \"epoch\" = 1000 steps, 16000 samples, 2048000 tokens\n", "pipe": "stderr"}
+{"event": "line", "data": "#\n", "pipe": "stderr"}
+{"event": "line", "data": "# Model = 12 n_layer, 768 n_embd, 128 ctx_len\n", "pipe": "stderr"}
+{"event": "line", "data": "#\n", "pipe": "stderr"}
+{"event": "line", "data": "# Adam = lr 0.0006 to 1e-05, warmup 0 steps, beta (0.9, 0.99), eps 1e-08\n", "pipe": "stderr"}
+{"event": "line", "data": "#\n", "pipe": "stderr"}
+{"event": "line", "data": "# Found torch 2.1.0+cu118, recommend 1.13.1+cu117 or newer\n", "pipe": "stderr"}
+{"event": "line", "data": "# Found deepspeed 0.12.2, recommend 0.7.0 (faster than newer versions)\n", "pipe": "stderr"}
+{"event": "line", "data": "# Found pytorch_lightning 1.9.5, recommend 1.9.1 or newer\n", "pipe": "stderr"}
+{"event": "line", "data": "#\n", "pipe": "stderr"}
+{"event": "line", "data": "############################################################################\n", "pipe": "stderr"}
+{"event": "line", "data": "\n", "pipe": "stderr"}
+{"event": "line", "data": "{'load_model': '', 'wandb': '', 'proj_dir': '/Users/satyaortiz-gagne/travail/mila/milabench/proj/rwkv/', 'random_seed': 1234, 'data_file': '', 'data_type': 'dummy', 'vocab_size': 0, 'ctx_len': 128, 'epoch_steps': 1000, 'epoch_count': 20, 'epoch_begin': 0, 'epoch_save': 0, 'micro_bsz': 16, 'n_layer': 12, 'n_embd': 768, 'dim_att': 768, 'dim_ffn': 3072, 'pre_ffn': 0, 'head_qk': 0, 'tiny_att_dim': 0, 'tiny_att_layer': -999, 'lr_init': 0.0006, 'lr_final': 1e-05, 'warmup_steps': 0, 'beta1': 0.9, 'beta2': 0.99, 'adam_eps': 1e-08, 'grad_cp': 0, 'my_pile_version': 1, 'my_pile_stage': 0, 'my_pile_shift': -1, 'my_pile_edecay': 0, 'layerwise_lr': 1, 'ds_bucket_mb': 200, 'my_img_version': 0, 'my_img_size': 0, 'my_img_bit': 0, 'my_img_clip': 'x', 'my_img_clip_scale': 1, 'my_img_l1_scale': 0, 'my_img_encoder': 'x', 'my_sample_len': 0, 'my_ffn_shift': 1, 'my_att_shift': 1, 'my_pos_emb': 0, 'load_partial': 0, 'magic_prime': 0, 'my_qa_mask': 0, 'my_testing': '', 'logger': False, 'enable_checkpointing': False, 'default_root_dir': None, 'gradient_clip_val': 1.0, 'gradient_clip_algorithm': None, 'num_nodes': 1, 'num_processes': None, 'devices': '1', 'gpus': None, 'auto_select_gpus': None, 'tpu_cores': None, 'ipus': None, 'enable_progress_bar': False, 'overfit_batches': 0.0, 'track_grad_norm': -1, 'check_val_every_n_epoch': 100000000000000000000, 'fast_dev_run': False, 'accumulate_grad_batches': None, 'max_epochs': -1, 'min_epochs': None, 'max_steps': -1, 'min_steps': None, 'max_time': None, 'limit_train_batches': None, 'limit_val_batches': None, 'limit_test_batches': None, 'limit_predict_batches': None, 'val_check_interval': None, 'log_every_n_steps': 100000000000000000000, 'accelerator': 'gpu', 'strategy': 'ddp_find_unused_parameters_false', 'sync_batchnorm': False, 'precision': 'tf32', 'enable_model_summary': True, 'num_sanity_val_steps': 0, 'resume_from_checkpoint': None, 'profiler': None, 'benchmark': None, 'reload_dataloaders_every_n_epochs': 0, 'auto_lr_find': False, 'replace_sampler_ddp': False, 'detect_anomaly': False, 'auto_scale_batch_size': False, 'plugins': None, 'amp_backend': None, 'amp_level': None, 'move_metrics_to_cpu': False, 'multiple_trainloader_mode': 'max_size_cycle', 'inference_mode': True, 'my_timestamp': '2024-04-04-18-44-49', 'betas': (0.9, 0.99), 'real_bsz': 16, 'run_name': '0 ctx128 L12 D768'}\n", "pipe": "stderr"}
+{"event": "line", "data": "\n", "pipe": "stderr"}
+{"event": "line", "data": "Building dummy data...\n", "pipe": "stderr"}
+{"event": "line", "data": "Building token list...\n", "pipe": "stderr"}
+{"event": "line", "data": "Data has 1620950 tokens, 13 vocab size.\n", "pipe": "stderr"}
+{"event": "line", "data": "RWKV_MY_TESTING \n", "pipe": "stdout"}
+{"event": "line", "data": "Using /mnt/Users/satyaortiz-gagne/travail/mila/milabench/cache/torch_extensions/py310_cu118 as PyTorch extensions root...\n", "pipe": "stderr"}
+{"event": "line", "data": "Loading extension module wkv_128...\n", "pipe": "stderr"}
+{"event": "line", "data": "\n", "pipe": "stdout"}
+{"event": "line", "data": "############################################################################\n", "pipe": "stdout"}
+{"event": "line", "data": "#\n", "pipe": "stdout"}
+{"event": "line", "data": "# Init model weight (slow for large models)...\n", "pipe": "stdout"}
+{"event": "line", "data": "#\n", "pipe": "stdout"}
+{"event": "line", "data": "############################################################################\n", "pipe": "stdout"}
+{"event": "line", "data": "\n", "pipe": "stdout"}
+{"event": "line", "data": "13 768 -0.0006 emb.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.0.att.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 1.0 blocks.0.att.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.0.att.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.0.att.output.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "3072 768 1.0 blocks.0.ffn.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.0.ffn.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 3072 0 blocks.0.ffn.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.1.att.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 1.0 blocks.1.att.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.1.att.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.1.att.output.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "3072 768 1.0 blocks.1.ffn.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.1.ffn.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 3072 0 blocks.1.ffn.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.2.att.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 1.0 blocks.2.att.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.2.att.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.2.att.output.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "3072 768 1.0 blocks.2.ffn.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.2.ffn.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 3072 0 blocks.2.ffn.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.3.att.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 1.0 blocks.3.att.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.3.att.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.3.att.output.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "3072 768 1.0 blocks.3.ffn.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.3.ffn.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 3072 0 blocks.3.ffn.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.4.att.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 1.0 blocks.4.att.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.4.att.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.4.att.output.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "3072 768 1.0 blocks.4.ffn.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.4.ffn.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 3072 0 blocks.4.ffn.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.5.att.key.weight", "pipe": "stdout"}
+{"event": "line", "data": "\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 1.0 blocks.5.att.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.5.att.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.5.att.output.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "3072 768 1.0 blocks.5.ffn.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.5.ffn.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 3072 0 blocks.5.ffn.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.6.att.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 1.0 blocks.6.att.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.6.att.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.6.att.output.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "3072 768 1.0 blocks.6.ffn.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.6.ffn.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 3072 0 blocks.6.ffn.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.7.att.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 1.0 blocks.7.att.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.7.att.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.7.att.output.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "3072 768 1.0 blocks.7.ffn.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.7.ffn.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 3072 0 blocks.7.ffn.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.8.att.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 1.0 blocks.8.att.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.8.att.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.8.att.output.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "3072 768 1.0 blocks.8.ffn.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.8.ffn.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 3072 0 blocks.8.ffn.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.9.att.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 1.0 blocks.9.att.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.9.att.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.9.att.output.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "3072 768 1.0 blocks.9.ffn.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.9.ffn.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 3072 0 blocks.9.ffn.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.10.att.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 1.0 blocks.10.att.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.10.att.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.10.att.output.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "3072 768 1.0 blocks.10.ffn.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.10.ffn.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 3072 0 blocks.10.ffn.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.11.att.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 1.0 blocks.11.att.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.11.att.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.11.att.output.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "3072 768 1.0 blocks.11.ffn.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.11.ffn.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 3072 0 blocks.11.ffn.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "13 768 0.5 head.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "GPU available: True (cuda), used: True\n", "pipe": "stderr"}
+{"event": "line", "data": "TPU available: False, using: 0 TPU cores\n", "pipe": "stderr"}
+{"event": "line", "data": "IPU available: False, using: 0 IPUs\n", "pipe": "stderr"}
+{"event": "line", "data": "HPU available: False, using: 0 HPUs\n", "pipe": "stderr"}
+{"event": "line", "data": "13 768 emb.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.0.ln1.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.0.ln1.bias\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.0.ln2.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.0.ln2.bias\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.0.ln0.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.0.ln0.bias\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.0.att.time_decay\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.0.att.time_first\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.0.att.time_mix_k\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.0.att.time_mix_v\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.0.att.time_mix_r\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.0.att.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.0.att.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.0.att.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.0.att.output.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.0.ffn.time_mix_k\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.0.ffn.time_mix_r\n", "pipe": "stdout"}
+{"event": "line", "data": "3072 768 blocks.0.ffn.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.0.ffn.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 3072 blocks.0.ffn.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.1.ln1.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.1.ln1.bias\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.1.ln2.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.1.ln2.bias\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.1.att.time_decay\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.1.att.time_first\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.1.att.time_mix_k\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.1.att.time_mix_v\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.1.att.time_mix_r\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.1.att.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.1.att.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.1.att.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.1.att.output.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.1.ffn.time_mix_k\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.1.ffn.time_mix_r\n", "pipe": "stdout"}
+{"event": "line", "data": "3072 768 blocks.1.ffn.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.1.ffn.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 3072 blocks.1.ffn.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.2.ln1.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.2.ln1.bias\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.2.ln2.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.2.ln2.bias\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.2.att.time_decay\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.2.att.time_first\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.2.att.time_mix_k\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.2.att.time_mix_v\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.2.att.time_mix_r\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.2.att.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.2.att.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.2.att.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.2.att.output.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.2.ffn.time_mix_k\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.2.ffn.time_mix_r\n", "pipe": "stdout"}
+{"event": "line", "data": "3072 768 blocks.2.ffn.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.2.ffn.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 3072 blocks.2.ffn.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.3.ln1.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.3.ln1.bias\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.3.ln2.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.3.ln2.bias\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.3.att.time_decay\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.3.att.time_first\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.3.att.time_mix_k\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.3.att.time_mix_v\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.3.att.time_mix_r\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.3.att.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.3.att.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.3.att.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.3.att.output.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.3.ffn.time_mix_k\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.3.ffn.time_mix_r\n", "pipe": "stdout"}
+{"event": "line", "data": "3072 768 blocks.3.ffn.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.3.ffn.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 3072 blocks.3.ffn.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.4.ln1.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.4.ln1.bias\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.4.ln2.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.4.ln2.bias\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.4.att.time_decay\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.4.att.time_first\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.4.att.time_mix_k\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.4.att.time_mix_v\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.4.att.time_mix_r\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.4.att.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.4.att.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.4.att.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.4.att.output.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.4.ffn.time_mix_k\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.4.ffn.time_mix_r\n", "pipe": "stdout"}
+{"event": "line", "data": "3072 768 blocks.4.ffn.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.4.ffn.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 3072 blocks.4.ffn.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.5.ln1.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.5.ln1.bias\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.5.ln2.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.5.ln2.bias\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.5.att.time_decay\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.5.att.time_first\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.5.att.time_mix_k\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.5.att.time_mix_v\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.5.att.time_mix_r\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.5.att.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.5.att.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.5.att.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.5.att.output.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.5.ffn.time_mix_k\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.5.ffn.time_mix_r\n", "pipe": "stdout"}
+{"event": "line", "data": "3072 768 blocks.5.ffn.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.5.ffn.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 3072 blocks.5.ffn.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.6.ln1.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.6.ln1.bias\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.6.ln2.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.6.ln2.bias\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.6.att.time_decay\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.6.att.time_first\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.6.att.time_mix_k\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.6.att.time_mix_v\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.6.att.time_mix_r\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.6.att.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.6.att.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.6.att.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.6.att.output.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.6.ffn.time_mix_k\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.6.ffn.time_mix_r\n", "pipe": "stdout"}
+{"event": "line", "data": "3072 768 blocks.6.ffn.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.6.ffn.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 3072 blocks.6.ffn.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.7.ln1.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.7.ln1.bias\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.7.ln2.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.7.ln2.bias\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.7.att.time_decay\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.7.att.time_first\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.7.att.time_mix_k\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.7.att.time_mix_v\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.7.att.time_mix_r\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.7.att.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.7.att.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.7.att.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.7.att.output.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.7.ffn.time_mix_k\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.7.ffn.time_mix_r\n", "pipe": "stdout"}
+{"event": "line", "data": "3072 768 blocks.7.ffn.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.7.ffn.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 3072 blocks.7.ffn.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.8.ln1.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.8.ln1.bias\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.8.ln2.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.8.ln2.bias\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.8.att.time_decay\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.8.att.time_first\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.8.att.time_mix_k\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.8.att.time_mix_v\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.8.att.time_mix_r\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.8.att.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.8.att.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.8.att.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.8.att.output.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.8.ffn.time_mix_k\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.8.ffn.time_mix_r\n", "pipe": "stdout"}
+{"event": "line", "data": "3072 768 blocks.8.ffn.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.8.ffn.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 3072 blocks.8.ffn.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.9.ln1.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.9.ln1.bias\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.9.ln2.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.9.ln2.bias\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.9.att.time_decay\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.9.att.time_first\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.9.att.time_mix_k\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.9.att.time_mix_v\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.9.att.time_mix_r\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.9.att.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.9.att.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.9.att.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.9.att.output.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.9.ffn.time_mix_k\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.9.ffn.time_mix_r\n", "pipe": "stdout"}
+{"event": "line", "data": "3072 768 blocks.9.ffn.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.9.ffn.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 3072 blocks.9.ffn.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.10.ln1.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.10.ln1.bias\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.10.ln2.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.10.ln2.bias\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.10.att.time_decay\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.10.att.time_first\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.10.att.time_mix_k\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.10.att.time_mix_v\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.10.att.time_mix_r\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.10.att.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.10.att.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.10.att.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.10.att.output.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.10.ffn.time_mix_k\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.10.ffn.time_mix_r\n", "pipe": "stdout"}
+{"event": "line", "data": "3072 768 blocks.10.ffn.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.10.ffn.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 3072 blocks.10.ffn.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.11.ln1.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.11.ln1.bias\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.11.ln2.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.11.ln2.bias\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.11.att.time_decay\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.11.att.time_first\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.11.att.time_mix_k\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.11.att.time_mix_v\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.11.att.time_mix_r\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.11.att.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.11.att.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.11.att.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.11.att.output.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.11.ffn.time_mix_k\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.11.ffn.time_mix_r\n", "pipe": "stdout"}
+{"event": "line", "data": "3072 768 blocks.11.ffn.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.11.ffn.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 3072 blocks.11.ffn.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 ln_out.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 ln_out.bias\n", "pipe": "stdout"}
+{"event": "line", "data": "13 768 head.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "[rank: 0] Global seed set to 1234\n", "pipe": "stderr"}
+{"event": "line", "data": "Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/1\n", "pipe": "stderr"}
+{"event": "line", "data": "----------------------------------------------------------------------------------------------------\n", "pipe": "stderr"}
+{"event": "line", "data": "distributed_backend=nccl\n", "pipe": "stderr"}
+{"event": "line", "data": "All distributed processes registered. Starting with 1 processes\n", "pipe": "stderr"}
+{"event": "line", "data": "----------------------------------------------------------------------------------------------------\n", "pipe": "stderr"}
+{"event": "line", "data": "\n", "pipe": "stderr"}
+{"event": "line", "data": "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n", "pipe": "stderr"}
+{"event": "line", "data": "Installed CUDA version 11.5 does not match the version torch was compiled with 11.8 but since the APIs are compatible, accepting this combination\n", "pipe": "stdout"}
+{"event": "line", "data": "Using /mnt/Users/satyaortiz-gagne/travail/mila/milabench/cache/torch_extensions/py310_cu118 as PyTorch extensions root...\n", "pipe": "stderr"}
+{"event": "line", "data": "Loading extension module fused_adam...\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2869.8125, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2412.5625, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2412.5625, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2412.5625, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2412.5625, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3975.8125, 24512.0], "load": 0.06, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3975.8125, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3975.8125, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3975.8125, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3975.8125, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3975.8125, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "error", "data": {"type": "ImportError", "message": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/cache/torch_extensions/py310_cu118/fused_adam/fused_adam.so: cannot open shared object file: No such file or directory"}, "pipe": "data"}
+{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"}
+{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"}
+{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"}
+{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/rwkv/rwkv-v4neo/train.py\", line 420, in \n", "pipe": "stderr"}
+{"event": "line", "data": " trainer.fit(model, data_loader)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py\", line 608, in fit\n", "pipe": "stderr"}
+{"event": "line", "data": " call._call_and_handle_interrupt(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/pytorch_lightning/trainer/call.py\", line 36, in _call_and_handle_interrupt\n", "pipe": "stderr"}
+{"event": "line", "data": " return trainer.strategy.launcher.launch(trainer_fn, *args, trainer=trainer, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/pytorch_lightning/strategies/launchers/subprocess_script.py\", line 88, in launch\n", "pipe": "stderr"}
+{"event": "line", "data": " return function(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py\", line 650, in _fit_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " self._run(model, ckpt_path=self.ckpt_path)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py\", line 1093, in _run\n", "pipe": "stderr"}
+{"event": "line", "data": " self.strategy.setup(self)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/pytorch_lightning/strategies/ddp.py\", line 181, in setup\n", "pipe": "stderr"}
+{"event": "line", "data": " self.setup_optimizers(trainer)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/pytorch_lightning/strategies/strategy.py\", line 142, in setup_optimizers\n", "pipe": "stderr"}
+{"event": "line", "data": " self.optimizers, self.lr_scheduler_configs, self.optimizer_frequencies = _init_optimizers_and_lr_schedulers(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/pytorch_lightning/core/optimizer.py\", line 180, in _init_optimizers_and_lr_schedulers\n", "pipe": "stderr"}
+{"event": "line", "data": " optim_conf = model.trainer._call_lightning_module_hook(\"configure_optimizers\", pl_module=model)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py\", line 1356, in _call_lightning_module_hook\n", "pipe": "stderr"}
+{"event": "line", "data": " output = fn(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/rwkv/rwkv-v4neo/src/model.py\", line 606, in configure_optimizers\n", "pipe": "stderr"}
+{"event": "line", "data": " return FusedAdam(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/deepspeed/ops/adam/fused_adam.py\", line 94, in __init__\n", "pipe": "stderr"}
+{"event": "line", "data": " fused_adam_cuda = FusedAdamBuilder().load()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/deepspeed/ops/op_builder/builder.py\", line 452, in load\n", "pipe": "stderr"}
+{"event": "line", "data": " return self.jit_load(verbose)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/deepspeed/ops/op_builder/builder.py\", line 501, in jit_load\n", "pipe": "stderr"}
+{"event": "line", "data": " op_module = load(name=self.name,\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/utils/cpp_extension.py\", line 1308, in load\n", "pipe": "stderr"}
+{"event": "line", "data": " return _jit_compile(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/utils/cpp_extension.py\", line 1736, in _jit_compile\n", "pipe": "stderr"}
+{"event": "line", "data": " return _import_module_from_library(name, build_directory, is_python_module)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/utils/cpp_extension.py\", line 2136, in _import_module_from_library\n", "pipe": "stderr"}
+{"event": "line", "data": " module = importlib.util.module_from_spec(spec)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"\", line 571, in module_from_spec\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"\", line 1176, in create_module\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"\", line 241, in _call_with_frames_removed\n", "pipe": "stderr"}
+{"event": "line", "data": "ImportError: /mnt/Users/satyaortiz-gagne/travail/mila/milabench/cache/torch_extensions/py310_cu118/fused_adam/fused_adam.so: cannot open shared object file: No such file or directory\n", "pipe": "stderr"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/rwkv/voirconf-rwkv.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/rwkv/rwkv-v4neo/train.py", "--data_type", "dummy", "--ctx_len", "128", "--epoch_steps", "1000", "--epoch_count", "20", "--epoch_begin", "0", "--epoch_save", "0", "--micro_bsz", "16", "--n_layer", "12", "--n_embd", "768", "--pre_ffn", "0", "--head_qk", "0", "--lr_init", "6e-4", "--lr_final", "1e-5", "--warmup_steps", "0", "--beta1", "0.9", "--beta2", "0.99", "--adam_eps", "1e-8", "--accelerator", "gpu", "--devices", "1", "--precision", "tf32", "--strategy", "ddp_find_unused_parameters_false", "--grad_cp", "0", "--random_seed", "1234", "--enable_progress_bar", "False"], "time": 1712256321.2152421, "return_code": 1}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/rwkv.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/rwkv.D1.data
new file mode 100644
index 000000000..f3e001267
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/rwkv.D1.data
@@ -0,0 +1,478 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/rwkv", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "rwkv", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/rwkv", "tags": ["llm", "rnn", "unsupported-rocm"], "plan": {"method": "per_gpu"}, "argv": {"--data_type": "dummy", "--ctx_len": 128, "--epoch_steps": 1000, "--epoch_count": 20, "--epoch_begin": 0, "--epoch_save": 0, "--micro_bsz": 16, "--n_layer": 12, "--n_embd": 768, "--pre_ffn": 0, "--head_qk": 0, "--lr_init": "6e-4", "--lr_final": "1e-5", "--warmup_steps": 0, "--beta1": 0.9, "--beta2": 0.99, "--adam_eps": "1e-8", "--accelerator": "gpu", "--devices": 1, "--precision": "tf32", "--strategy": "ddp_find_unused_parameters_false", "--grad_cp": 0, "--random_seed": 1234, "--enable_progress_bar": "False"}, "weight": 1.0, "name": "rwkv", "tag": ["rwkv", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.75, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09830899151436032}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712256285.982062, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/rwkv/voirconf-rwkv.D1-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/rwkv/rwkv-v4neo/train.py", "--data_type", "dummy", "--ctx_len", "128", "--epoch_steps", "1000", "--epoch_count", "20", "--epoch_begin", "0", "--epoch_save", "0", "--micro_bsz", "16", "--n_layer", "12", "--n_embd", "768", "--pre_ffn", "0", "--head_qk", "0", "--lr_init", "6e-4", "--lr_final", "1e-5", "--warmup_steps", "0", "--beta1", "0.9", "--beta2", "0.99", "--adam_eps", "1e-8", "--accelerator", "gpu", "--devices", "1", "--precision", "tf32", "--strategy", "ddp_find_unused_parameters_false", "--grad_cp", "0", "--random_seed", "1234", "--enable_progress_bar", "False"], "time": 1712256286.0277565}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "line", "data": "[2024-04-04 18:44:47,845] [INFO] [real_accelerator.py:158:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", "pipe": "stdout"}
+{"event": "line", "data": "########## work in progress ##########\n", "pipe": "stderr"}
+{"event": "line", "data": "########## WARNING: GLOBAL SEED 1234 THIS WILL AFFECT MULTIGPU SAMPLING ##########\n", "pipe": "stdout"}
+{"event": "line", "data": "########## WARNING: GLOBAL SEED 1234 THIS WILL AFFECT MULTIGPU SAMPLING ##########\n", "pipe": "stdout"}
+{"event": "line", "data": "########## WARNING: GLOBAL SEED 1234 THIS WILL AFFECT MULTIGPU SAMPLING ##########\n", "pipe": "stdout"}
+{"event": "line", "data": "\n", "pipe": "stdout"}
+{"event": "line", "data": "Global seed set to 1234\n", "pipe": "stderr"}
+{"event": "line", "data": "\n", "pipe": "stderr"}
+{"event": "line", "data": "############################################################################\n", "pipe": "stderr"}
+{"event": "line", "data": "#\n", "pipe": "stderr"}
+{"event": "line", "data": "# RWKV-4 TF32 on 1x1 GPU, bsz 1x1x16=16, ddp_find_unused_parameters_false \n", "pipe": "stderr"}
+{"event": "line", "data": "#\n", "pipe": "stderr"}
+{"event": "line", "data": "# Data = (dummy), ProjDir = /Users/satyaortiz-gagne/travail/mila/milabench/proj/rwkv/\n", "pipe": "stderr"}
+{"event": "line", "data": "#\n", "pipe": "stderr"}
+{"event": "line", "data": "# Epoch = 0 to 19 (will continue afterwards), save every 0 epoch\n", "pipe": "stderr"}
+{"event": "line", "data": "#\n", "pipe": "stderr"}
+{"event": "line", "data": "# Each \"epoch\" = 1000 steps, 16000 samples, 2048000 tokens\n", "pipe": "stderr"}
+{"event": "line", "data": "#\n", "pipe": "stderr"}
+{"event": "line", "data": "# Model = 12 n_layer, 768 n_embd, 128 ctx_len\n", "pipe": "stderr"}
+{"event": "line", "data": "#\n", "pipe": "stderr"}
+{"event": "line", "data": "# Adam = lr 0.0006 to 1e-05, warmup 0 steps, beta (0.9, 0.99), eps 1e-08\n", "pipe": "stderr"}
+{"event": "line", "data": "#\n", "pipe": "stderr"}
+{"event": "line", "data": "# Found torch 2.1.0+cu118, recommend 1.13.1+cu117 or newer\n", "pipe": "stderr"}
+{"event": "line", "data": "# Found deepspeed 0.12.2, recommend 0.7.0 (faster than newer versions)\n", "pipe": "stderr"}
+{"event": "line", "data": "# Found pytorch_lightning 1.9.5, recommend 1.9.1 or newer\n", "pipe": "stderr"}
+{"event": "line", "data": "#\n", "pipe": "stderr"}
+{"event": "line", "data": "############################################################################\n", "pipe": "stderr"}
+{"event": "line", "data": "\n", "pipe": "stderr"}
+{"event": "line", "data": "{'load_model': '', 'wandb': '', 'proj_dir': '/Users/satyaortiz-gagne/travail/mila/milabench/proj/rwkv/', 'random_seed': 1234, 'data_file': '', 'data_type': 'dummy', 'vocab_size': 0, 'ctx_len': 128, 'epoch_steps': 1000, 'epoch_count': 20, 'epoch_begin': 0, 'epoch_save': 0, 'micro_bsz': 16, 'n_layer': 12, 'n_embd': 768, 'dim_att': 768, 'dim_ffn': 3072, 'pre_ffn': 0, 'head_qk': 0, 'tiny_att_dim': 0, 'tiny_att_layer': -999, 'lr_init': 0.0006, 'lr_final': 1e-05, 'warmup_steps': 0, 'beta1': 0.9, 'beta2': 0.99, 'adam_eps': 1e-08, 'grad_cp': 0, 'my_pile_version': 1, 'my_pile_stage': 0, 'my_pile_shift': -1, 'my_pile_edecay': 0, 'layerwise_lr': 1, 'ds_bucket_mb': 200, 'my_img_version': 0, 'my_img_size': 0, 'my_img_bit': 0, 'my_img_clip': 'x', 'my_img_clip_scale': 1, 'my_img_l1_scale': 0, 'my_img_encoder': 'x', 'my_sample_len': 0, 'my_ffn_shift': 1, 'my_att_shift': 1, 'my_pos_emb': 0, 'load_partial': 0, 'magic_prime': 0, 'my_qa_mask': 0, 'my_testing': '', 'logger': False, 'enable_checkpointing': False, 'default_root_dir': None, 'gradient_clip_val': 1.0, 'gradient_clip_algorithm': None, 'num_nodes': 1, 'num_processes': None, 'devices': '1', 'gpus': None, 'auto_select_gpus': None, 'tpu_cores': None, 'ipus': None, 'enable_progress_bar': False, 'overfit_batches': 0.0, 'track_grad_norm': -1, 'check_val_every_n_epoch': 100000000000000000000, 'fast_dev_run': False, 'accumulate_grad_batches': None, 'max_epochs': -1, 'min_epochs': None, 'max_steps': -1, 'min_steps': None, 'max_time': None, 'limit_train_batches': None, 'limit_val_batches': None, 'limit_test_batches': None, 'limit_predict_batches': None, 'val_check_interval': None, 'log_every_n_steps': 100000000000000000000, 'accelerator': 'gpu', 'strategy': 'ddp_find_unused_parameters_false', 'sync_batchnorm': False, 'precision': 'tf32', 'enable_model_summary': True, 'num_sanity_val_steps': 0, 'resume_from_checkpoint': None, 'profiler': None, 'benchmark': None, 'reload_dataloaders_every_n_epochs': 0, 'auto_lr_find': False, 'replace_sampler_ddp': False, 'detect_anomaly': False, 'auto_scale_batch_size': False, 'plugins': None, 'amp_backend': None, 'amp_level': None, 'move_metrics_to_cpu': False, 'multiple_trainloader_mode': 'max_size_cycle', 'inference_mode': True, 'my_timestamp': '2024-04-04-18-44-49', 'betas': (0.9, 0.99), 'real_bsz': 16, 'run_name': '0 ctx128 L12 D768'}\n", "pipe": "stderr"}
+{"event": "line", "data": "\n", "pipe": "stderr"}
+{"event": "line", "data": "Building dummy data...\n", "pipe": "stderr"}
+{"event": "line", "data": "Building token list...\n", "pipe": "stderr"}
+{"event": "line", "data": "Data has 1620950 tokens, 13 vocab size.\n", "pipe": "stderr"}
+{"event": "line", "data": "RWKV_MY_TESTING \n", "pipe": "stdout"}
+{"event": "line", "data": "Using /mnt/Users/satyaortiz-gagne/travail/mila/milabench/cache/torch_extensions/py310_cu118 as PyTorch extensions root...\n", "pipe": "stderr"}
+{"event": "line", "data": "Creating extension directory /mnt/Users/satyaortiz-gagne/travail/mila/milabench/cache/torch_extensions/py310_cu118/wkv_128...\n", "pipe": "stderr"}
+{"event": "line", "data": "Detected CUDA files, patching ldflags\n", "pipe": "stderr"}
+{"event": "line", "data": "Emitting ninja build file /mnt/Users/satyaortiz-gagne/travail/mila/milabench/cache/torch_extensions/py310_cu118/wkv_128/build.ninja...\n", "pipe": "stderr"}
+{"event": "line", "data": "Building extension module wkv_128...\n", "pipe": "stderr"}
+{"event": "line", "data": "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", "pipe": "stderr"}
+{"event": "line", "data": "[1/3] /usr/bin/nvcc -DTORCH_EXTENSION_NAME=wkv_128 -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\\\"_gcc\\\" -DPYBIND11_STDLIB=\\\"_libstdcpp\\\" -DPYBIND11_BUILD_ABI=\\\"_cxxabi1011\\\" -isystem /mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/include -isystem /mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/include/torch/csrc/api/include -isystem /mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/include/TH -isystem /mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/include/THC -isystem /home/ubuntu/miniconda3/include/python3.10 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_86,code=compute_86 -gencode=arch=compute_86,code=sm_86 --compiler-options '-fPIC' -res-usage --maxrregcount 60 --use_fast_math -O3 -Xptxas -O3 --extra-device-vectorization -DTmax=128 -std=c++17 -c /mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/rwkv/rwkv-v4neo/cuda/wkv_cuda.cu -o wkv_cuda.cuda.o \n", "pipe": "stdout"}
+{"event": "line", "data": "ptxas info : 0 bytes gmem\n", "pipe": "stdout"}
+{"event": "line", "data": "ptxas info : Compiling entry function '_Z15kernel_backwardIfEviiiPKT_S2_S2_S2_S2_S2_PS0_S3_S3_S3_' for 'sm_86'\n", "pipe": "stdout"}
+{"event": "line", "data": "ptxas info : Function properties for _Z15kernel_backwardIfEviiiPKT_S2_S2_S2_S2_S2_PS0_S3_S3_S3_\n", "pipe": "stdout"}
+{"event": "line", "data": " 1024 bytes stack frame, 0 bytes spill stores, 0 bytes spill loads\n", "pipe": "stdout"}
+{"event": "line", "data": "ptxas info : Used 48 registers, 448 bytes cmem[0], 16 bytes cmem[2]\n", "pipe": "stdout"}
+{"event": "line", "data": "ptxas info : Compiling entry function '_Z14kernel_forwardIfEviiiPKT_S2_S2_S2_PS0_' for 'sm_86'\n", "pipe": "stdout"}
+{"event": "line", "data": "ptxas info : Function properties for _Z14kernel_forwardIfEviiiPKT_S2_S2_S2_PS0_\n", "pipe": "stdout"}
+{"event": "line", "data": " 0 bytes stack frame, 0 bytes spill stores, 0 bytes spill loads\n", "pipe": "stdout"}
+{"event": "line", "data": "ptxas info : Used 40 registers, 408 bytes cmem[0]\n", "pipe": "stdout"}
+{"event": "line", "data": "[2/3] c++ -MMD -MF wkv_op.o.d -DTORCH_EXTENSION_NAME=wkv_128 -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\\\"_gcc\\\" -DPYBIND11_STDLIB=\\\"_libstdcpp\\\" -DPYBIND11_BUILD_ABI=\\\"_cxxabi1011\\\" -isystem /mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/include -isystem /mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/include/torch/csrc/api/include -isystem /mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/include/TH -isystem /mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/include/THC -isystem /home/ubuntu/miniconda3/include/python3.10 -D_GLIBCXX_USE_CXX11_ABI=0 -fPIC -std=c++17 -c /mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/rwkv/rwkv-v4neo/cuda/wkv_op.cpp -o wkv_op.o \n", "pipe": "stdout"}
+{"event": "line", "data": "[3/3] c++ wkv_op.o wkv_cuda.cuda.o -shared -L/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/lib -lc10 -lc10_cuda -ltorch_cpu -ltorch_cuda -ltorch -ltorch_python -L/usr/lib64 -lcudart -o wkv_128.so\n", "pipe": "stdout"}
+{"event": "line", "data": "Loading extension module wkv_128...\n", "pipe": "stderr"}
+{"event": "line", "data": "\n", "pipe": "stdout"}
+{"event": "line", "data": "############################################################################\n", "pipe": "stdout"}
+{"event": "line", "data": "#\n", "pipe": "stdout"}
+{"event": "line", "data": "# Init model weight (slow for large models)...\n", "pipe": "stdout"}
+{"event": "line", "data": "#\n", "pipe": "stdout"}
+{"event": "line", "data": "############################################################################\n", "pipe": "stdout"}
+{"event": "line", "data": "\n", "pipe": "stdout"}
+{"event": "line", "data": "13 768 -0.0006 emb.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.0.att.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 1.0 blocks.0.att.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.0.att.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.0.att.output.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "3072 768 1.0 blocks.0.ffn.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.0.ffn.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 3072 0 blocks.0.ffn.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.1.att.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 1.0 blocks.1.att.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.1.att.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.1.att.output.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "3072 768 1.0 blocks.1.ffn.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.1.ffn.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 3072 0 blocks.1.ffn.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.2.att.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 1.0 blocks.2.att.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.2.att.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.2.att.output.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "3072 768 1.0 blocks.2.ffn.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.2.ffn.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 3072 0 blocks.2.ffn.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.3.att.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 1.0 blocks.3.att.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.3.att.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.3.att.output.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "3072 768 1.0 blocks.3.ffn.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.3.ffn.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 3072 0 blocks.3.ffn.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.4.att.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 1.0 blocks.4.att.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.4.att.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.4.att.output.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "3072 768 1.0 blocks.4.ffn.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.4.ffn.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 3072 0 blocks.4.ffn.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.5.att.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 1.0 blocks.5.att.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.5.att.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.5.att.output.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "3072 768 1.0 blocks.5.ffn.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.5.ffn.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 3072 0 blocks.5.ffn.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.6.att.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 1.0 blocks.6.att.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.6.att.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.6.att.output.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "3072 768 1.0 blocks.6.ffn.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.6.ffn.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 3072 0 blocks.6.ffn.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.7.att.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 1.0 blocks.7.att.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.7.att.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.7.att.output.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "3072 768 1.0 blocks.7.ffn.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.7.ffn.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 3072 0 blocks.7.ffn.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.8.att.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 1.0 blocks.8.att.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.8.att.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.8.att.output.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "3072 768 1.0 blocks.8.ffn.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.8.ffn.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 3072 0 blocks.8.ffn.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.9.att.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 1.0 blocks.9.att.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.9.att.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.9.att.output.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "3072 768 1.0 blocks.9.ffn.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.9.ffn.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 3072 0 blocks.9.ffn.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.10.att.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 1.0 blocks.10.att.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.10.att.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.10.att.output.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "3072 768 1.0 blocks.10.ffn.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.10.ffn.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 3072 0 blocks.10.ffn.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.11.att.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 1.0 blocks.11.att.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.11.att.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.11.att.output.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "3072 768 1.0 blocks.11.ffn.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 0 blocks.11.ffn.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 3072 0 blocks.11.ffn.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "13 768 0.5 head.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "GPU available: True (cuda), used: True\n", "pipe": "stderr"}
+{"event": "line", "data": "TPU available: False, using: 0 TPU cores\n", "pipe": "stderr"}
+{"event": "line", "data": "IPU available: False, using: 0 IPUs\n", "pipe": "stderr"}
+{"event": "line", "data": "HPU available: False, using: 0 HPUs\n", "pipe": "stderr"}
+{"event": "line", "data": "13 768 emb.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.0.ln1.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.0.ln1.bias\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.0.ln2.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.0.ln2.bias\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.0.ln0.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.0.ln0.bias\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.0.att.time_decay\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.0.att.time_first\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.0.att.time_mix_k\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.0.att.time_mix_v\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.0.att.time_mix_r\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.0.att.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.0.att.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.0.att.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.0.att.output.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.0.ffn.time_mix_k\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.0.ffn.time_mix_r\n", "pipe": "stdout"}
+{"event": "line", "data": "3072 768 blocks.0.ffn.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.0.ffn.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 3072 blocks.0.ffn.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.1.ln1.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.1.ln1.bias\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.1.ln2.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.1.ln2.bias\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.1.att.time_decay\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.1.att.time_first\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.1.att.time_mix_k\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.1.att.time_mix_v\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.1.att.time_mix_r\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.1.att.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.1.att.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.1.att.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.1.att.output.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.1.ffn.time_mix_k\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.1.ffn.time_mix_r\n", "pipe": "stdout"}
+{"event": "line", "data": "3072 768 blocks.1.ffn.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.1.ffn.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 3072 blocks.1.ffn.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.2.ln1.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.2.ln1.bias\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.2.ln2.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.2.ln2.bias\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.2.att.time_decay\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.2.att.time_first\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.2.att.time_mix_k\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.2.att.time_mix_v\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.2.att.time_mix_r\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.2.att.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.2.att.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.2.att.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.2.att.output.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.2.ffn.time_mix_k\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.2.ffn.time_mix_r\n", "pipe": "stdout"}
+{"event": "line", "data": "3072 768 blocks.2.ffn.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.2.ffn.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 3072 blocks.2.ffn.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.3.ln1.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.3.ln1.bias\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.3.ln2.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.3.ln2.bias\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.3.att.time_decay\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.3.att.time_first\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.3.att.time_mix_k\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.3.att.time_mix_v\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.3.att.time_mix_r\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.3.att.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.3.att.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.3.att.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.3.att.output.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.3.ffn.time_mix_k\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.3.ffn.time_mix_r\n", "pipe": "stdout"}
+{"event": "line", "data": "3072 768 blocks.3.ffn.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.3.ffn.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 3072 blocks.3.ffn.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.4.ln1.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.4.ln1.bias\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.4.ln2.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.4.ln2.bias\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.4.att.time_decay\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.4.att.time_first\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.4.att.time_mix_k\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.4.att.time_mix_v\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.4.att.time_mix_r\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.4.att.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.4.att.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.4.att.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.4.att.output.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.4.ffn.time_mix_k\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.4.ffn.time_mix_r\n", "pipe": "stdout"}
+{"event": "line", "data": "3072 768 blocks.4.ffn.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.4.ffn.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 3072 blocks.4.ffn.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.5.ln1.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.5.ln1.bias\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.5.ln2.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.5.ln2.bias\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.5.att.time_decay\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.5.att.time_first\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.5.att.time_mix_k\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.5.att.time_mix_v\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.5.att.time_mix_r\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.5.att.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.5.att.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.5.att.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.5.att.output.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.5.ffn.time_mix_k\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.5.ffn.time_mix_r\n", "pipe": "stdout"}
+{"event": "line", "data": "3072 768 blocks.5.ffn.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.5.ffn.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 3072 blocks.5.ffn.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.6.ln1.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.6.ln1.bias\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.6.ln2.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.6.ln2.bias\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.6.att.time_decay\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.6.att.time_first\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.6.att.time_mix_k\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.6.att.time_mix_v\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.6.att.time_mix_r\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.6.att.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.6.att.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.6.att.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.6.att.output.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.6.ffn.time_mix_k\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.6.ffn.time_mix_r\n", "pipe": "stdout"}
+{"event": "line", "data": "3072 768 blocks.6.ffn.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.6.ffn.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 3072 blocks.6.ffn.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.7.ln1.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.7.ln1.bias\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.7.ln2.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.7.ln2.bias\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.7.att.time_decay\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.7.att.time_first\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.7.att.time_mix_k\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.7.att.time_mix_v\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.7.att.time_mix_r\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.7.att.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.7.att.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.7.att.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.7.att.output.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.7.ffn.time_mix_k\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.7.ffn.time_mix_r\n", "pipe": "stdout"}
+{"event": "line", "data": "3072 768 blocks.7.ffn.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.7.ffn.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 3072 blocks.7.ffn.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.8.ln1.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.8.ln1.bias\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.8.ln2.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.8.ln2.bias\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.8.att.time_decay\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.8.att.time_first\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.8.att.time_mix_k\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.8.att.time_mix_v\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.8.att.time_mix_r\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.8.att.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.8.att.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.8.att.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.8.att.output.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.8.ffn.time_mix_k\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.8.ffn.time_mix_r\n", "pipe": "stdout"}
+{"event": "line", "data": "3072 768 blocks.8.ffn.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.8.ffn.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 3072 blocks.8.ffn.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.9.ln1.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.9.ln1.bias\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.9.ln2.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.9.ln2.bias\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.9.att.time_decay\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.9.att.time_first\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.9.att.time_mix_k\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.9.att.time_mix_v\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.9.att.time_mix_r\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.9.att.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.9.att.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.9.att.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.9.att.output.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.9.ffn.time_mix_k\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.9.ffn.time_mix_r\n", "pipe": "stdout"}
+{"event": "line", "data": "3072 768 blocks.9.ffn.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.9.ffn.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 3072 blocks.9.ffn.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.10.ln1.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.10.ln1.bias\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.10.ln2.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.10.ln2.bias\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.10.att.time_decay\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.10.att.time_first\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.10.att.time_mix_k\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.10.att.time_mix_v\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.10.att.time_mix_r\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.10.att.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.10.att.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.10.att.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.10.att.output.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.10.ffn.time_mix_k\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.10.ffn.time_mix_r\n", "pipe": "stdout"}
+{"event": "line", "data": "3072 768 blocks.10.ffn.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.10.ffn.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 3072 blocks.10.ffn.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.11.ln1.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.11.ln1.bias\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.11.ln2.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.11.ln2.bias\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.11.att.time_decay\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.11.att.time_first\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.11.att.time_mix_k\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.11.att.time_mix_v\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.11.att.time_mix_r\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.11.att.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.11.att.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.11.att.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.11.att.output.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.11.ffn.time_mix_k\n", "pipe": "stdout"}
+{"event": "line", "data": "768 blocks.11.ffn.time_mix_r\n", "pipe": "stdout"}
+{"event": "line", "data": "3072 768 blocks.11.ffn.key.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 768 blocks.11.ffn.receptance.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 3072 blocks.11.ffn.value.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 ln_out.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "768 ln_out.bias\n", "pipe": "stdout"}
+{"event": "line", "data": "13 768 head.weight\n", "pipe": "stdout"}
+{"event": "line", "data": "[rank: 0] Global seed set to 1234\n", "pipe": "stderr"}
+{"event": "line", "data": "Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/1\n", "pipe": "stderr"}
+{"event": "line", "data": "----------------------------------------------------------------------------------------------------\n", "pipe": "stderr"}
+{"event": "line", "data": "distributed_backend=nccl\n", "pipe": "stderr"}
+{"event": "line", "data": "All distributed processes registered. Starting with 1 processes\n", "pipe": "stderr"}
+{"event": "line", "data": "----------------------------------------------------------------------------------------------------\n", "pipe": "stderr"}
+{"event": "line", "data": "\n", "pipe": "stderr"}
+{"event": "line", "data": "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]\n", "pipe": "stderr"}
+{"event": "line", "data": "Installed CUDA version 11.5 does not match the version torch was compiled with 11.8 but since the APIs are compatible, accepting this combination\n", "pipe": "stdout"}
+{"event": "line", "data": "Using /mnt/Users/satyaortiz-gagne/travail/mila/milabench/cache/torch_extensions/py310_cu118 as PyTorch extensions root...\n", "pipe": "stderr"}
+{"event": "line", "data": "Creating extension directory /mnt/Users/satyaortiz-gagne/travail/mila/milabench/cache/torch_extensions/py310_cu118/fused_adam...\n", "pipe": "stderr"}
+{"event": "line", "data": "Detected CUDA files, patching ldflags\n", "pipe": "stderr"}
+{"event": "line", "data": "Emitting ninja build file /mnt/Users/satyaortiz-gagne/travail/mila/milabench/cache/torch_extensions/py310_cu118/fused_adam/build.ninja...\n", "pipe": "stderr"}
+{"event": "line", "data": "Building extension module fused_adam...\n", "pipe": "stderr"}
+{"event": "line", "data": "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", "pipe": "stderr"}
+{"event": "line", "data": "[1/3] /usr/bin/nvcc -DTORCH_EXTENSION_NAME=fused_adam -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\\\"_gcc\\\" -DPYBIND11_STDLIB=\\\"_libstdcpp\\\" -DPYBIND11_BUILD_ABI=\\\"_cxxabi1011\\\" -I/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/deepspeed/ops/csrc/includes -I/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/deepspeed/ops/csrc/adam -isystem /mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/include -isystem /mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/include/torch/csrc/api/include -isystem /mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/include/TH -isystem /mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/include/THC -isystem /home/ubuntu/miniconda3/include/python3.10 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_86,code=compute_86 -gencode=arch=compute_86,code=sm_86 --compiler-options '-fPIC' -O3 -DVERSION_GE_1_1 -DVERSION_GE_1_3 -DVERSION_GE_1_5 -lineinfo --use_fast_math -gencode=arch=compute_86,code=sm_86 -gencode=arch=compute_86,code=compute_86 -DBF16_AVAILABLE -U__CUDA_NO_BFLOAT16_OPERATORS__ -U__CUDA_NO_BFLOAT162_OPERATORS__ -std=c++17 -c /mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/deepspeed/ops/csrc/adam/multi_tensor_adam.cu -o multi_tensor_adam.cuda.o \n", "pipe": "stdout"}
+{"event": "line", "data": "FAILED: multi_tensor_adam.cuda.o \n", "pipe": "stdout"}
+{"event": "line", "data": "/usr/bin/nvcc -DTORCH_EXTENSION_NAME=fused_adam -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\\\"_gcc\\\" -DPYBIND11_STDLIB=\\\"_libstdcpp\\\" -DPYBIND11_BUILD_ABI=\\\"_cxxabi1011\\\" -I/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/deepspeed/ops/csrc/includes -I/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/deepspeed/ops/csrc/adam -isystem /mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/include -isystem /mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/include/torch/csrc/api/include -isystem /mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/include/TH -isystem /mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/include/THC -isystem /home/ubuntu/miniconda3/include/python3.10 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_86,code=compute_86 -gencode=arch=compute_86,code=sm_86 --compiler-options '-fPIC' -O3 -DVERSION_GE_1_1 -DVERSION_GE_1_3 -DVERSION_GE_1_5 -lineinfo --use_fast_math -gencode=arch=compute_86,code=sm_86 -gencode=arch=compute_86,code=compute_86 -DBF16_AVAILABLE -U__CUDA_NO_BFLOAT16_OPERATORS__ -U__CUDA_NO_BFLOAT162_OPERATORS__ -std=c++17 -c /mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/deepspeed/ops/csrc/adam/multi_tensor_adam.cu -o multi_tensor_adam.cuda.o \n", "pipe": "stdout"}
+{"event": "line", "data": "/usr/include/c++/11/bits/std_function.h:435:145: error: parameter packs not expanded with \u2018...\u2019:\n", "pipe": "stdout"}
+{"event": "line", "data": " 435 | function(_Functor&& __f)\n", "pipe": "stdout"}
+{"event": "line", "data": " | ^ \n", "pipe": "stdout"}
+{"event": "line", "data": "/usr/include/c++/11/bits/std_function.h:435:145: note: \u2018_ArgTypes\u2019\n", "pipe": "stdout"}
+{"event": "line", "data": "/usr/include/c++/11/bits/std_function.h:530:146: error: parameter packs not expanded with \u2018...\u2019:\n", "pipe": "stdout"}
+{"event": "line", "data": " 530 | operator=(_Functor&& __f)\n", "pipe": "stdout"}
+{"event": "line", "data": " | ^ \n", "pipe": "stdout"}
+{"event": "line", "data": "/usr/include/c++/11/bits/std_function.h:530:146: note: \u2018_ArgTypes\u2019\n", "pipe": "stdout"}
+{"event": "line", "data": "[2/3] c++ -MMD -MF fused_adam_frontend.o.d -DTORCH_EXTENSION_NAME=fused_adam -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\\\"_gcc\\\" -DPYBIND11_STDLIB=\\\"_libstdcpp\\\" -DPYBIND11_BUILD_ABI=\\\"_cxxabi1011\\\" -I/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/deepspeed/ops/csrc/includes -I/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/deepspeed/ops/csrc/adam -isystem /mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/include -isystem /mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/include/torch/csrc/api/include -isystem /mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/include/TH -isystem /mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/include/THC -isystem /home/ubuntu/miniconda3/include/python3.10 -D_GLIBCXX_USE_CXX11_ABI=0 -fPIC -std=c++17 -O3 -std=c++17 -g -Wno-reorder -DVERSION_GE_1_1 -DVERSION_GE_1_3 -DVERSION_GE_1_5 -DBF16_AVAILABLE -c /mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/deepspeed/ops/csrc/adam/fused_adam_frontend.cpp -o fused_adam_frontend.o \n", "pipe": "stdout"}
+{"event": "line", "data": "ninja: build stopped: subcommand failed.\n", "pipe": "stdout"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2869.8125, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2412.5625, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/utils/cpp_extension.py\", line 2100, in _run_ninja_build\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2412.5625, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2412.5625, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2412.5625, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3975.8125, 24512.0], "load": 0.06, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3975.8125, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3975.8125, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3975.8125, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3975.8125, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3975.8125, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "error", "data": {"type": "RuntimeError", "message": "Error building extension 'fused_adam'"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "line", "data": " subprocess.run(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/home/ubuntu/miniconda3/lib/python3.10/subprocess.py\", line 526, in run\n", "pipe": "stderr"}
+{"event": "line", "data": " raise CalledProcessError(retcode, process.args,\n", "pipe": "stderr"}
+{"event": "line", "data": "subprocess.CalledProcessError: Command '['ninja', '-v']' returned non-zero exit status 1.\n", "pipe": "stderr"}
+{"event": "line", "data": "\n", "pipe": "stderr"}
+{"event": "line", "data": "The above exception was the direct cause of the following exception:\n", "pipe": "stderr"}
+{"event": "line", "data": "\n", "pipe": "stderr"}
+{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"}
+{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"}
+{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"}
+{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"}
+{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/rwkv/rwkv-v4neo/train.py\", line 420, in \n", "pipe": "stderr"}
+{"event": "line", "data": " trainer.fit(model, data_loader)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py\", line 608, in fit\n", "pipe": "stderr"}
+{"event": "line", "data": " call._call_and_handle_interrupt(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/pytorch_lightning/trainer/call.py\", line 36, in _call_and_handle_interrupt\n", "pipe": "stderr"}
+{"event": "line", "data": " return trainer.strategy.launcher.launch(trainer_fn, *args, trainer=trainer, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/pytorch_lightning/strategies/launchers/subprocess_script.py\", line 88, in launch\n", "pipe": "stderr"}
+{"event": "line", "data": " return function(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py\", line 650, in _fit_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " self._run(model, ckpt_path=self.ckpt_path)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py\", line 1093, in _run\n", "pipe": "stderr"}
+{"event": "line", "data": " self.strategy.setup(self)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/pytorch_lightning/strategies/ddp.py\", line 181, in setup\n", "pipe": "stderr"}
+{"event": "line", "data": " self.setup_optimizers(trainer)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/pytorch_lightning/strategies/strategy.py\", line 142, in setup_optimizers\n", "pipe": "stderr"}
+{"event": "line", "data": " self.optimizers, self.lr_scheduler_configs, self.optimizer_frequencies = _init_optimizers_and_lr_schedulers(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/pytorch_lightning/core/optimizer.py\", line 180, in _init_optimizers_and_lr_schedulers\n", "pipe": "stderr"}
+{"event": "line", "data": " optim_conf = model.trainer._call_lightning_module_hook(\"configure_optimizers\", pl_module=model)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py\", line 1356, in _call_lightning_module_hook\n", "pipe": "stderr"}
+{"event": "line", "data": " output = fn(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/rwkv/rwkv-v4neo/src/model.py\", line 606, in configure_optimizers\n", "pipe": "stderr"}
+{"event": "line", "data": " return FusedAdam(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/deepspeed/ops/adam/fused_adam.py\", line 94, in __init__\n", "pipe": "stderr"}
+{"event": "line", "data": " fused_adam_cuda = FusedAdamBuilder().load()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/deepspeed/ops/op_builder/builder.py\", line 452, in load\n", "pipe": "stderr"}
+{"event": "line", "data": " return self.jit_load(verbose)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/deepspeed/ops/op_builder/builder.py\", line 501, in jit_load\n", "pipe": "stderr"}
+{"event": "line", "data": " op_module = load(name=self.name,\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/utils/cpp_extension.py\", line 1308, in load\n", "pipe": "stderr"}
+{"event": "line", "data": " return _jit_compile(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/utils/cpp_extension.py\", line 1710, in _jit_compile\n", "pipe": "stderr"}
+{"event": "line", "data": " _write_ninja_file_and_build_library(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/utils/cpp_extension.py\", line 1823, in _write_ninja_file_and_build_library\n", "pipe": "stderr"}
+{"event": "line", "data": " _run_ninja_build(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/utils/cpp_extension.py\", line 2116, in _run_ninja_build\n", "pipe": "stderr"}
+{"event": "line", "data": " raise RuntimeError(message) from e\n", "pipe": "stderr"}
+{"event": "line", "data": "RuntimeError: Error building extension 'fused_adam'\n", "pipe": "stderr"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/rwkv/voirconf-rwkv.D1-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/rwkv/rwkv-v4neo/train.py", "--data_type", "dummy", "--ctx_len", "128", "--epoch_steps", "1000", "--epoch_count", "20", "--epoch_begin", "0", "--epoch_save", "0", "--micro_bsz", "16", "--n_layer", "12", "--n_embd", "768", "--pre_ffn", "0", "--head_qk", "0", "--lr_init", "6e-4", "--lr_final", "1e-5", "--warmup_steps", "0", "--beta1", "0.9", "--beta2", "0.99", "--adam_eps", "1e-8", "--accelerator", "gpu", "--devices", "1", "--precision", "tf32", "--strategy", "ddp_find_unused_parameters_false", "--grad_cp", "0", "--random_seed", "1234", "--enable_progress_bar", "False"], "time": 1712256320.832648, "return_code": 1}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/stargan.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/stargan.D0.data
new file mode 100644
index 000000000..3f64fecb5
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/stargan.D0.data
@@ -0,0 +1,169 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "stargan", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "tags": ["gan", "resnet", "vision"], "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/stargan", "plan": {"method": "per_gpu"}, "argv": {"--image_size": 512, "--c_dim": 5, "--batch_size": 16}, "weight": 1.0, "name": "stargan", "tag": ["stargan", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712256054.185817, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan/voirconf-stargan.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/stargan/stargan/main.py", "--image_size", "512", "--c_dim", "5", "--batch_size", "16"], "time": 1712256056.526606}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "line", "data": "Namespace(c_dim=5, c2_dim=8, celeba_crop_size=178, rafd_crop_size=256, image_size=512, g_conv_dim=64, d_conv_dim=64, g_repeat_num=6, d_repeat_num=6, lambda_cls=1, lambda_rec=10, lambda_gp=10, dataset='synth', batch_size=16, num_iters=200000, num_iters_decay=100000, g_lr=0.0001, d_lr=0.0001, n_critic=5, beta1=0.5, beta2=0.999, resume_iters=None, selected_attrs=['Black_Hair', 'Blond_Hair', 'Brown_Hair', 'Male', 'Young'], test_iters=200000, num_workers=1, mode='train', use_tensorboard=False, celeba_image_dir='data/celeba/images', attr_path='data/celeba/list_attr_celeba.txt', rafd_image_dir='data/RaFD/train', log_dir='/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan/logs', model_save_dir='/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan/models', sample_dir='/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan/samples', result_dir='/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan/results', log_step=10, sample_step=1000, model_save_step=10000, lr_update_step=1000)\n", "pipe": "stdout"}
+{"event": "line", "data": "Generator(\n", "pipe": "stdout"}
+{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"}
+{"event": "line", "data": " (0): Conv2d(8, 64, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (1): InstanceNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (3): Conv2d(64, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (4): InstanceNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (5): ReLU(inplace=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (6): Conv2d(128, 256, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (7): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (8): ReLU(inplace=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (9): ResidualBlock(\n", "pipe": "stdout"}
+{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"}
+{"event": "line", "data": " (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (1): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (4): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " )\n", "pipe": "stdout"}
+{"event": "line", "data": " )\n", "pipe": "stdout"}
+{"event": "line", "data": " (10): ResidualBlock(\n", "pipe": "stdout"}
+{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"}
+{"event": "line", "data": " (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (1): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (4): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " )\n", "pipe": "stdout"}
+{"event": "line", "data": " )\n", "pipe": "stdout"}
+{"event": "line", "data": " (11): ResidualBlock(\n", "pipe": "stdout"}
+{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"}
+{"event": "line", "data": " (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (1): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (4): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " )\n", "pipe": "stdout"}
+{"event": "line", "data": " )\n", "pipe": "stdout"}
+{"event": "line", "data": " (12): ResidualBlock(\n", "pipe": "stdout"}
+{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"}
+{"event": "line", "data": " (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (1): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (4): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " )\n", "pipe": "stdout"}
+{"event": "line", "data": " )\n", "pipe": "stdout"}
+{"event": "line", "data": " (13): ResidualBlock(\n", "pipe": "stdout"}
+{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"}
+{"event": "line", "data": " (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (1): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (4): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " )\n", "pipe": "stdout"}
+{"event": "line", "data": " )\n", "pipe": "stdout"}
+{"event": "line", "data": " (14): ResidualBlock(\n", "pipe": "stdout"}
+{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"}
+{"event": "line", "data": " (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (1): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (4): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " )\n", "pipe": "stdout"}
+{"event": "line", "data": " )\n", "pipe": "stdout"}
+{"event": "line", "data": " (15): ConvTranspose2d(256, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (16): InstanceNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (17): ReLU(inplace=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (18): ConvTranspose2d(128, 64, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (19): InstanceNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (20): ReLU(inplace=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (21): Conv2d(64, 3, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (22): Tanh()\n", "pipe": "stdout"}
+{"event": "line", "data": " )\n", "pipe": "stdout"}
+{"event": "line", "data": ")\n", "pipe": "stdout"}
+{"event": "line", "data": "G\n", "pipe": "stdout"}
+{"event": "line", "data": "The number of parameters: 8430528\n", "pipe": "stdout"}
+{"event": "line", "data": "Discriminator(\n", "pipe": "stdout"}
+{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"}
+{"event": "line", "data": " (0): Conv2d(3, 64, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", "pipe": "stdout"}
+{"event": "line", "data": " (1): LeakyReLU(negative_slope=0.01)\n", "pipe": "stdout"}
+{"event": "line", "data": " (2): Conv2d(64, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", "pipe": "stdout"}
+{"event": "line", "data": " (3): LeakyReLU(negative_slope=0.01)\n", "pipe": "stdout"}
+{"event": "line", "data": " (4): Conv2d(128, 256, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", "pipe": "stdout"}
+{"event": "line", "data": " (5): LeakyReLU(negative_slope=0.01)\n", "pipe": "stdout"}
+{"event": "line", "data": " (6): Conv2d(256, 512, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", "pipe": "stdout"}
+{"event": "line", "data": " (7): LeakyReLU(negative_slope=0.01)\n", "pipe": "stdout"}
+{"event": "line", "data": " (8): Conv2d(512, 1024, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", "pipe": "stdout"}
+{"event": "line", "data": " (9): LeakyReLU(negative_slope=0.01)\n", "pipe": "stdout"}
+{"event": "line", "data": " (10): Conv2d(1024, 2048, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", "pipe": "stdout"}
+{"event": "line", "data": " (11): LeakyReLU(negative_slope=0.01)\n", "pipe": "stdout"}
+{"event": "line", "data": " )\n", "pipe": "stdout"}
+{"event": "line", "data": " (conv1): Conv2d(2048, 1, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (conv2): Conv2d(2048, 5, kernel_size=(8, 8), stride=(1, 1), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": ")\n", "pipe": "stdout"}
+{"event": "line", "data": "D\n", "pipe": "stdout"}
+{"event": "line", "data": "The number of parameters: 45376448\n", "pipe": "stdout"}
+{"event": "data", "data": {"task": "train", "progress": [0, 10000]}, "pipe": "data"}
+{"event": "line", "data": "Start training...\n", "pipe": "stdout"}
+{"event": "data", "data": {"task": "train", "progress": [1, 10000]}, "pipe": "data"}
+{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='sum' instead.\n", "pipe": "stderr"}
+{"event": "line", "data": " warnings.warn(warning.format(ret))\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "train", "loss": 13.11575698852539}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [16217.8125, 24512.0], "load": 0.65, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [20421.8125, 24512.0], "load": 0.94, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [20161.8125, 24512.0], "load": 0.98, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [2, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24403.8125, 24512.0], "load": 0.98, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 256.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 108.19 MiB is free. Including non-PyTorch memory, this process has 21.48 GiB memory in use. Of the allocated memory 20.76 GiB is allocated by PyTorch, and 433.74 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"}
+{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"}
+{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"}
+{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"}
+{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/stargan/stargan/main.py\", line 222, in \n", "pipe": "stderr"}
+{"event": "line", "data": " main(config)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/stargan/stargan/main.py\", line 77, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " solver.train()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/stargan/stargan/solver.py\", line 282, in train\n", "pipe": "stderr"}
+{"event": "line", "data": " x_fake = self.G(x_real, c_trg)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/stargan/stargan/model.py\", line 94, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return self.main(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/stargan/stargan/model.py\", line 20, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return x + self.main(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py\", line 460, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._conv_forward(input, self.weight, self.bias)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py\", line 456, in _conv_forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return F.conv2d(input, weight, bias, self.stride,\n", "pipe": "stderr"}
+{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 256.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 108.19 MiB is free. Including non-PyTorch memory, this process has 21.48 GiB memory in use. Of the allocated memory 20.76 GiB is allocated by PyTorch, and 433.74 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan/voirconf-stargan.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/stargan/stargan/main.py", "--image_size", "512", "--c_dim", "5", "--batch_size", "16"], "time": 1712256068.2847652, "return_code": 1}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/stargan.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/stargan.D1.data
new file mode 100644
index 000000000..db92b9066
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/stargan.D1.data
@@ -0,0 +1,169 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "stargan", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "tags": ["gan", "resnet", "vision"], "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/stargan", "plan": {"method": "per_gpu"}, "argv": {"--image_size": 512, "--c_dim": 5, "--batch_size": 16}, "weight": 1.0, "name": "stargan", "tag": ["stargan", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.75, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09830899151436032}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712256056.507142, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan/voirconf-stargan.D1-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/stargan/stargan/main.py", "--image_size", "512", "--c_dim", "5", "--batch_size", "16"], "time": 1712256056.535834}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "line", "data": "Namespace(c_dim=5, c2_dim=8, celeba_crop_size=178, rafd_crop_size=256, image_size=512, g_conv_dim=64, d_conv_dim=64, g_repeat_num=6, d_repeat_num=6, lambda_cls=1, lambda_rec=10, lambda_gp=10, dataset='synth', batch_size=16, num_iters=200000, num_iters_decay=100000, g_lr=0.0001, d_lr=0.0001, n_critic=5, beta1=0.5, beta2=0.999, resume_iters=None, selected_attrs=['Black_Hair', 'Blond_Hair', 'Brown_Hair', 'Male', 'Young'], test_iters=200000, num_workers=1, mode='train', use_tensorboard=False, celeba_image_dir='data/celeba/images', attr_path='data/celeba/list_attr_celeba.txt', rafd_image_dir='data/RaFD/train', log_dir='/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan/logs', model_save_dir='/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan/models', sample_dir='/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan/samples', result_dir='/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan/results', log_step=10, sample_step=1000, model_save_step=10000, lr_update_step=1000)\n", "pipe": "stdout"}
+{"event": "line", "data": "Generator(\n", "pipe": "stdout"}
+{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"}
+{"event": "line", "data": " (0): Conv2d(8, 64, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (1): InstanceNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (3): Conv2d(64, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (4): InstanceNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (5): ReLU(inplace=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (6): Conv2d(128, 256, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (7): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (8): ReLU(inplace=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (9): ResidualBlock(\n", "pipe": "stdout"}
+{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"}
+{"event": "line", "data": " (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (1): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (4): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " )\n", "pipe": "stdout"}
+{"event": "line", "data": " )\n", "pipe": "stdout"}
+{"event": "line", "data": " (10): ResidualBlock(\n", "pipe": "stdout"}
+{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"}
+{"event": "line", "data": " (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (1): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (4): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " )\n", "pipe": "stdout"}
+{"event": "line", "data": " )\n", "pipe": "stdout"}
+{"event": "line", "data": " (11): ResidualBlock(\n", "pipe": "stdout"}
+{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"}
+{"event": "line", "data": " (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (1): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (4): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " )\n", "pipe": "stdout"}
+{"event": "line", "data": " )\n", "pipe": "stdout"}
+{"event": "line", "data": " (12): ResidualBlock(\n", "pipe": "stdout"}
+{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"}
+{"event": "line", "data": " (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (1): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (4): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " )\n", "pipe": "stdout"}
+{"event": "line", "data": " )\n", "pipe": "stdout"}
+{"event": "line", "data": " (13): ResidualBlock(\n", "pipe": "stdout"}
+{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"}
+{"event": "line", "data": " (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (1): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (4): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " )\n", "pipe": "stdout"}
+{"event": "line", "data": " )\n", "pipe": "stdout"}
+{"event": "line", "data": " (14): ResidualBlock(\n", "pipe": "stdout"}
+{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"}
+{"event": "line", "data": " (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (1): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (4): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " )\n", "pipe": "stdout"}
+{"event": "line", "data": " )\n", "pipe": "stdout"}
+{"event": "line", "data": " (15): ConvTranspose2d(256, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (16): InstanceNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (17): ReLU(inplace=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (18): ConvTranspose2d(128, 64, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (19): InstanceNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (20): ReLU(inplace=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (21): Conv2d(64, 3, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (22): Tanh()\n", "pipe": "stdout"}
+{"event": "line", "data": " )\n", "pipe": "stdout"}
+{"event": "line", "data": ")\n", "pipe": "stdout"}
+{"event": "line", "data": "G\n", "pipe": "stdout"}
+{"event": "line", "data": "The number of parameters: 8430528\n", "pipe": "stdout"}
+{"event": "line", "data": "Discriminator(\n", "pipe": "stdout"}
+{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"}
+{"event": "line", "data": " (0): Conv2d(3, 64, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", "pipe": "stdout"}
+{"event": "line", "data": " (1): LeakyReLU(negative_slope=0.01)\n", "pipe": "stdout"}
+{"event": "line", "data": " (2): Conv2d(64, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", "pipe": "stdout"}
+{"event": "line", "data": " (3): LeakyReLU(negative_slope=0.01)\n", "pipe": "stdout"}
+{"event": "line", "data": " (4): Conv2d(128, 256, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", "pipe": "stdout"}
+{"event": "line", "data": " (5): LeakyReLU(negative_slope=0.01)\n", "pipe": "stdout"}
+{"event": "line", "data": " (6): Conv2d(256, 512, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", "pipe": "stdout"}
+{"event": "line", "data": " (7): LeakyReLU(negative_slope=0.01)\n", "pipe": "stdout"}
+{"event": "line", "data": " (8): Conv2d(512, 1024, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", "pipe": "stdout"}
+{"event": "line", "data": " (9): LeakyReLU(negative_slope=0.01)\n", "pipe": "stdout"}
+{"event": "line", "data": " (10): Conv2d(1024, 2048, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", "pipe": "stdout"}
+{"event": "line", "data": " (11): LeakyReLU(negative_slope=0.01)\n", "pipe": "stdout"}
+{"event": "line", "data": " )\n", "pipe": "stdout"}
+{"event": "line", "data": " (conv1): Conv2d(2048, 1, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (conv2): Conv2d(2048, 5, kernel_size=(8, 8), stride=(1, 1), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": ")\n", "pipe": "stdout"}
+{"event": "line", "data": "D\n", "pipe": "stdout"}
+{"event": "line", "data": "The number of parameters: 45376448\n", "pipe": "stdout"}
+{"event": "data", "data": {"task": "train", "progress": [0, 10000]}, "pipe": "data"}
+{"event": "line", "data": "Start training...\n", "pipe": "stdout"}
+{"event": "data", "data": {"task": "train", "progress": [1, 10000]}, "pipe": "data"}
+{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='sum' instead.\n", "pipe": "stderr"}
+{"event": "line", "data": " warnings.warn(warning.format(ret))\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "train", "loss": 13.079126358032227}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [16217.8125, 24512.0], "load": 0.65, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [20421.8125, 24512.0], "load": 0.92, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [20161.8125, 24512.0], "load": 0.98, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [2, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24403.8125, 24512.0], "load": 0.98, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 256.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 108.19 MiB is free. Including non-PyTorch memory, this process has 21.48 GiB memory in use. Of the allocated memory 20.76 GiB is allocated by PyTorch, and 433.74 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"}
+{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"}
+{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"}
+{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"}
+{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/stargan/stargan/main.py\", line 222, in \n", "pipe": "stderr"}
+{"event": "line", "data": " main(config)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/stargan/stargan/main.py\", line 77, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " solver.train()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/stargan/stargan/solver.py\", line 282, in train\n", "pipe": "stderr"}
+{"event": "line", "data": " x_fake = self.G(x_real, c_trg)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/stargan/stargan/model.py\", line 94, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return self.main(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/stargan/stargan/model.py\", line 20, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return x + self.main(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py\", line 460, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._conv_forward(input, self.weight, self.bias)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py\", line 456, in _conv_forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return F.conv2d(input, weight, bias, self.stride,\n", "pipe": "stderr"}
+{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 256.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 108.19 MiB is free. Including non-PyTorch memory, this process has 21.48 GiB memory in use. Of the allocated memory 20.76 GiB is allocated by PyTorch, and 433.74 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan/voirconf-stargan.D1-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/stargan/stargan/main.py", "--image_size", "512", "--c_dim", "5", "--batch_size", "16"], "time": 1712256068.359305, "return_code": 1}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/super-slomo.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/super-slomo.D0.data
new file mode 100644
index 000000000..6b2a1b003
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/super-slomo.D0.data
@@ -0,0 +1,57 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/super-slomo", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "super-slomo", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "tags": ["convnet", "unet", "video-interpolation", "vision"], "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/super-slomo", "plan": {"method": "per_gpu"}, "argv": {"--train_batch_size": 32}, "weight": 1.0, "name": "super-slomo", "tag": ["super-slomo", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712256070.689504, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/super-slomo/voirconf-super-slomo.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/super-slomo/slomo/train.py", "--train_batch_size", "32"], "time": 1712256073.038403}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead.\n", "pipe": "stderr"}
+{"event": "line", "data": " warnings.warn(\n", "pipe": "stderr"}
+{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=VGG16_Weights.IMAGENET1K_V1`. You can also use `weights=VGG16_Weights.DEFAULT` to get the most up-to-date weights.\n", "pipe": "stderr"}
+{"event": "line", "data": " warnings.warn(msg)\n", "pipe": "stderr"}
+{"event": "line", "data": "Epoch: 0\n", "pipe": "stdout"}
+{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/optim/lr_scheduler.py:136: UserWarning: Detected call of `lr_scheduler.step()` before `optimizer.step()`. In PyTorch 1.1.0 and later, you should call them in the opposite order: `optimizer.step()` before `lr_scheduler.step()`. Failure to do this will result in PyTorch skipping the first value of the learning rate schedule. See more details at https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate\n", "pipe": "stderr"}
+{"event": "line", "data": " warnings.warn(\"Detected call of `lr_scheduler.step()` before `optimizer.step()`. \"\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "train", "progress": [0, 10000]}, "pipe": "data"}
+{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/functional.py:4296: UserWarning: Default grid_sample and affine_grid behavior has changed to align_corners=False since 1.3.0. Please specify align_corners=True if the old behavior is desired. See the documentation of grid_sample for details.\n", "pipe": "stderr"}
+{"event": "line", "data": " warnings.warn(\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24457.8125, 24512.0], "load": 0.11, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 484.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 54.19 MiB is free. Including non-PyTorch memory, this process has 21.53 GiB memory in use. Of the allocated memory 20.51 GiB is allocated by PyTorch, and 765.90 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"}
+{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"}
+{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"}
+{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"}
+{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/super-slomo/slomo/train.py\", line 274, in \n", "pipe": "stderr"}
+{"event": "line", "data": " main()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/super-slomo/slomo/train.py\", line 219, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " intrpOut = ArbTimeFlowIntrp(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/super-slomo/slomo/model.py\", line 209, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " x = self.up5(x, s1)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/super-slomo/slomo/model.py\", line 139, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " x = F.leaky_relu(self.conv2(torch.cat((x, skpCn), 1)), negative_slope=0.1)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py\", line 460, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._conv_forward(input, self.weight, self.bias)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py\", line 456, in _conv_forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return F.conv2d(input, weight, bias, self.stride,\n", "pipe": "stderr"}
+{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 484.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 54.19 MiB is free. Including non-PyTorch memory, this process has 21.53 GiB memory in use. Of the allocated memory 20.51 GiB is allocated by PyTorch, and 765.90 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/super-slomo/voirconf-super-slomo.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/super-slomo/slomo/train.py", "--train_batch_size", "32"], "time": 1712256077.6808748, "return_code": 1}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/super-slomo.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/super-slomo.D1.data
new file mode 100644
index 000000000..1dfd9d19c
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/super-slomo.D1.data
@@ -0,0 +1,57 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/super-slomo", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "super-slomo", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "tags": ["convnet", "unet", "video-interpolation", "vision"], "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/super-slomo", "plan": {"method": "per_gpu"}, "argv": {"--train_batch_size": 32}, "weight": 1.0, "name": "super-slomo", "tag": ["super-slomo", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.75, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09830899151436032}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712256073.022484, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/super-slomo/voirconf-super-slomo.D1-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/super-slomo/slomo/train.py", "--train_batch_size", "32"], "time": 1712256073.0449874}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead.\n", "pipe": "stderr"}
+{"event": "line", "data": " warnings.warn(\n", "pipe": "stderr"}
+{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=VGG16_Weights.IMAGENET1K_V1`. You can also use `weights=VGG16_Weights.DEFAULT` to get the most up-to-date weights.\n", "pipe": "stderr"}
+{"event": "line", "data": " warnings.warn(msg)\n", "pipe": "stderr"}
+{"event": "line", "data": "Epoch: 0\n", "pipe": "stdout"}
+{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/optim/lr_scheduler.py:136: UserWarning: Detected call of `lr_scheduler.step()` before `optimizer.step()`. In PyTorch 1.1.0 and later, you should call them in the opposite order: `optimizer.step()` before `lr_scheduler.step()`. Failure to do this will result in PyTorch skipping the first value of the learning rate schedule. See more details at https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate\n", "pipe": "stderr"}
+{"event": "line", "data": " warnings.warn(\"Detected call of `lr_scheduler.step()` before `optimizer.step()`. \"\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "train", "progress": [0, 10000]}, "pipe": "data"}
+{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/functional.py:4296: UserWarning: Default grid_sample and affine_grid behavior has changed to align_corners=False since 1.3.0. Please specify align_corners=True if the old behavior is desired. See the documentation of grid_sample for details.\n", "pipe": "stderr"}
+{"event": "line", "data": " warnings.warn(\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24457.8125, 24512.0], "load": 0.08, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 484.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 54.19 MiB is free. Including non-PyTorch memory, this process has 21.53 GiB memory in use. Of the allocated memory 20.51 GiB is allocated by PyTorch, and 765.90 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"}
+{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"}
+{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"}
+{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"}
+{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/super-slomo/slomo/train.py\", line 274, in \n", "pipe": "stderr"}
+{"event": "line", "data": " main()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/super-slomo/slomo/train.py\", line 219, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " intrpOut = ArbTimeFlowIntrp(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/super-slomo/slomo/model.py\", line 209, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " x = self.up5(x, s1)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/super-slomo/slomo/model.py\", line 139, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " x = F.leaky_relu(self.conv2(torch.cat((x, skpCn), 1)), negative_slope=0.1)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py\", line 460, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._conv_forward(input, self.weight, self.bias)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py\", line 456, in _conv_forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return F.conv2d(input, weight, bias, self.stride,\n", "pipe": "stderr"}
+{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 484.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 54.19 MiB is free. Including non-PyTorch memory, this process has 21.53 GiB memory in use. Of the allocated memory 20.51 GiB is allocated by PyTorch, and 765.90 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/super-slomo/voirconf-super-slomo.D1-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/super-slomo/slomo/train.py", "--train_batch_size", "32"], "time": 1712256077.391992, "return_code": 1}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/t5.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/t5.D0.data
new file mode 100644
index 000000000..a66f634cb
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/t5.D0.data
@@ -0,0 +1,64 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "hf", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "argv": {"--precision": "tf32-fp16", "--num-workers": 8, "--model": "T5", "--batch-size": 16}, "plan": {"method": "per_gpu"}, "tags": ["huggingface", "language-modeling", "nlp", "transformer"], "weight": 2.0, "name": "t5", "tag": ["t5", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712255843.55336, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-t5.D0-0efae956f1553a76c1e03985181900f5.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "T5", "--batch-size", "16"], "time": 1712255845.9189045}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24097.8125, 24512.0], "load": 0.03, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 512.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 414.19 MiB is free. Including non-PyTorch memory, this process has 21.18 GiB memory in use. Of the allocated memory 20.75 GiB is allocated by PyTorch, and 157.31 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"}
+{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"}
+{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"}
+{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"}
+{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 156, in \n", "pipe": "stderr"}
+{"event": "line", "data": " main()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 152, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " runner.train()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 70, in train\n", "pipe": "stderr"}
+{"event": "line", "data": " self.step(data)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 55, in step\n", "pipe": "stderr"}
+{"event": "line", "data": " outputs = self.model(**data)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/t5/modeling_t5.py\", line 1746, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " decoder_outputs = self.decoder(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/t5/modeling_t5.py\", line 1113, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " layer_outputs = layer_module(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/t5/modeling_t5.py\", line 694, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " self_attention_outputs = self.layer[0](\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/t5/modeling_t5.py\", line 601, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " attention_output = self.SelfAttention(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/t5/modeling_t5.py\", line 561, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " attn_weights = nn.functional.softmax(scores.float(), dim=-1).type_as(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/functional.py\", line 1856, in softmax\n", "pipe": "stderr"}
+{"event": "line", "data": " ret = input.softmax(dim)\n", "pipe": "stderr"}
+{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 512.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 414.19 MiB is free. Including non-PyTorch memory, this process has 21.18 GiB memory in use. Of the allocated memory 20.75 GiB is allocated by PyTorch, and 157.31 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-t5.D0-0efae956f1553a76c1e03985181900f5.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "T5", "--batch-size", "16"], "time": 1712255849.5068624, "return_code": 1}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/t5.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/t5.D1.data
new file mode 100644
index 000000000..c521dc7f4
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/t5.D1.data
@@ -0,0 +1,64 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "hf", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "argv": {"--precision": "tf32-fp16", "--num-workers": 8, "--model": "T5", "--batch-size": 16}, "plan": {"method": "per_gpu"}, "tags": ["huggingface", "language-modeling", "nlp", "transformer"], "weight": 2.0, "name": "t5", "tag": ["t5", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.75, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09830899151436032}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712255845.903405, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-t5.D1-0efae956f1553a76c1e03985181900f5.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "T5", "--batch-size", "16"], "time": 1712255845.926259}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24097.8125, 24512.0], "load": 0.02, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 512.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 414.19 MiB is free. Including non-PyTorch memory, this process has 21.18 GiB memory in use. Of the allocated memory 20.75 GiB is allocated by PyTorch, and 157.31 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"}
+{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"}
+{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"}
+{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"}
+{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 156, in \n", "pipe": "stderr"}
+{"event": "line", "data": " main()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 152, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " runner.train()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 70, in train\n", "pipe": "stderr"}
+{"event": "line", "data": " self.step(data)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 55, in step\n", "pipe": "stderr"}
+{"event": "line", "data": " outputs = self.model(**data)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/t5/modeling_t5.py\", line 1746, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " decoder_outputs = self.decoder(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/t5/modeling_t5.py\", line 1113, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " layer_outputs = layer_module(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/t5/modeling_t5.py\", line 694, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " self_attention_outputs = self.layer[0](\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/t5/modeling_t5.py\", line 601, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " attention_output = self.SelfAttention(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/t5/modeling_t5.py\", line 561, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " attn_weights = nn.functional.softmax(scores.float(), dim=-1).type_as(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/functional.py\", line 1856, in softmax\n", "pipe": "stderr"}
+{"event": "line", "data": " ret = input.softmax(dim)\n", "pipe": "stderr"}
+{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 512.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 414.19 MiB is free. Including non-PyTorch memory, this process has 21.18 GiB memory in use. Of the allocated memory 20.75 GiB is allocated by PyTorch, and 157.31 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-t5.D1-0efae956f1553a76c1e03985181900f5.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "T5", "--batch-size", "16"], "time": 1712255849.632406, "return_code": 1}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/tf32.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/tf32.D0.data
new file mode 100644
index 000000000..0df40f92d
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/tf32.D0.data
@@ -0,0 +1,182 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/flops", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "flops", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops", "plan": {"method": "per_gpu"}, "tags": ["diagnostic", "flops"], "argv": {"--number": 10, "--repeat": 90, "--m": 8192, "--n": 8192, "--dtype": "fp32", "--tf32": true}, "weight": 0.0, "name": "tf32", "tag": ["tf32", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712255494.506738, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "10", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "fp32", "--tf32"], "time": 1712255496.8402894}, "pipe": null}
+{"event": "data", "data": {"task": "train", "rate": 44.32395615769511, "units": "Tflops", "t": 1712255498.6289868}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2409.75, 24512.0], "load": 0, "temperature": null, "power": null}}, "t": 1712255497.979648}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.04, "temperature": null, "power": null}}, "t": 1712255498.4867253}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.113888221337106, "units": "Tflops", "t": 1712255499.1171482}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.24, "temperature": null, "power": null}}, "t": 1712255498.993217}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.937108279575895, "units": "Tflops", "t": 1712255499.5959725}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.44, "temperature": null, "power": null}}, "t": 1712255499.4994547}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.68278843097592, "units": "Tflops", "t": 1712255500.0882452}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.57, "temperature": null, "power": null}}, "t": 1712255500.0061934}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.74688227388384, "units": "Tflops", "t": 1712255500.569585}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.77, "temperature": null, "power": null}}, "t": 1712255500.5126212}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.99754622200159, "units": "Tflops", "t": 1712255501.0584092}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.9, "temperature": null, "power": null}}, "t": 1712255501.018457}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.407794765119164, "units": "Tflops", "t": 1712255501.542985}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255501.524528}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.636620572837444, "units": "Tflops", "t": 1712255502.0357597}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255502.030958}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.01992714931719, "units": "Tflops", "t": 1712255502.524601}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.25808796513544, "units": "Tflops", "t": 1712255503.010978}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255502.5372303}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.565022200404776, "units": "Tflops", "t": 1712255503.4940054}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255503.0434697}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.24254794523227, "units": "Tflops", "t": 1712255503.9803195}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255503.5498009}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.44430716045276, "units": "Tflops", "t": 1712255504.464331}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255504.0562923}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.35295764222703, "units": "Tflops", "t": 1712255504.9493244}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255504.562552}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.330489517371284, "units": "Tflops", "t": 1712255505.4345589}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255505.0689962}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.91867693633559, "units": "Tflops", "t": 1712255505.9245315}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255505.5752397}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.86405391084061, "units": "Tflops", "t": 1712255506.4150202}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255506.0814886}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.80390417439531, "units": "Tflops", "t": 1712255506.9060657}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255506.5879545}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.27003883777447, "units": "Tflops", "t": 1712255507.3921373}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255507.0944297}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.03315971237542, "units": "Tflops", "t": 1712255507.880683}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255507.6007733}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.249362039925444, "units": "Tflops", "t": 1712255508.367085}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255508.107333}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.86756763115208, "units": "Tflops", "t": 1712255508.857536}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255508.6136293}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.413256491762844, "units": "Tflops", "t": 1712255509.3530936}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255509.1199245}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.629730615567254, "units": "Tflops", "t": 1712255509.846111}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255509.626175}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.57582919323457, "units": "Tflops", "t": 1712255510.339934}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255510.132472}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.29943193500668, "units": "Tflops", "t": 1712255510.8256989}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255510.6393943}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.07235346507425, "units": "Tflops", "t": 1712255511.3139145}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255511.1457307}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.30152336671966, "units": "Tflops", "t": 1712255511.7998052}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255511.652009}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.618892418552036, "units": "Tflops", "t": 1712255512.2929325}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255512.1583033}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.14842643719602, "units": "Tflops", "t": 1712255512.780409}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255512.664624}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.42014394596967, "units": "Tflops", "t": 1712255513.275786}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255513.1707602}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.30301418593073, "units": "Tflops", "t": 1712255513.7614896}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255513.6771686}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.010150601289375, "units": "Tflops", "t": 1712255514.2503874}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255514.18358}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.86466495310037, "units": "Tflops", "t": 1712255514.7408116}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255514.6898775}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.06675961840541, "units": "Tflops", "t": 1712255515.2290866}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255515.195948}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.08557279222453, "units": "Tflops", "t": 1712255515.7170584}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255515.7021916}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.032016393349885, "units": "Tflops", "t": 1712255516.2057006}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.19838508142936, "units": "Tflops", "t": 1712255516.7034588}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255516.2083428}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.617662119399924, "units": "Tflops", "t": 1712255517.1966574}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255516.7146041}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.09009117312506, "units": "Tflops", "t": 1712255517.6846452}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255517.221214}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.01836701084231, "units": "Tflops", "t": 1712255518.1736708}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255517.7274458}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.04655405981273, "units": "Tflops", "t": 1712255518.6621296}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255518.233823}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.473583510357905, "units": "Tflops", "t": 1712255519.1568947}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255518.740066}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.045212066603355, "units": "Tflops", "t": 1712255519.6453083}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255519.2467453}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.56512482058357, "units": "Tflops", "t": 1712255520.1390796}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255519.7528946}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.06995278598463, "units": "Tflops", "t": 1712255520.6272743}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255520.2602038}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.89405827208788, "units": "Tflops", "t": 1712255521.117369}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255520.7664056}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.06413935682868, "units": "Tflops", "t": 1712255521.605628}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255521.2727547}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.519973263292826, "units": "Tflops", "t": 1712255522.1000543}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255521.7790055}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.12750727845468, "units": "Tflops", "t": 1712255522.587689}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255522.2851374}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.28907231439495, "units": "Tflops", "t": 1712255523.0846434}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255522.7918847}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.818839367919864, "units": "Tflops", "t": 1712255523.5755682}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255523.2981963}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.36585096145274, "units": "Tflops", "t": 1712255524.071713}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255523.8043463}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.16850230977778, "units": "Tflops", "t": 1712255524.5588903}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255524.3109093}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.337763102041365, "units": "Tflops", "t": 1712255525.0551436}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255524.817204}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.86988131271229, "units": "Tflops", "t": 1712255525.5457551}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255525.3235364}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.464750159714605, "units": "Tflops", "t": 1712255526.0405846}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255525.8301597}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.83888471166438, "units": "Tflops", "t": 1712255526.531331}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255526.3365083}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 43.69336405367286, "units": "Tflops", "t": 1712255527.0348437}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255526.842745}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.190942954087575, "units": "Tflops", "t": 1712255527.5217907}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255527.3494315}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.16894352155789, "units": "Tflops", "t": 1712255528.0199347}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255527.8556828}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.07821307677197, "units": "Tflops", "t": 1712255528.5080955}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255528.362025}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.64601939820956, "units": "Tflops", "t": 1712255529.0011225}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255528.8683746}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.05436560733881, "units": "Tflops", "t": 1712255529.4894922}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255529.3747003}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.669111932001734, "units": "Tflops", "t": 1712255529.9822176}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255529.8810327}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.59244515057877, "units": "Tflops", "t": 1712255530.475687}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255530.387586}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 43.90467902682812, "units": "Tflops", "t": 1712255530.97685}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255530.8938532}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.91653320906153, "units": "Tflops", "t": 1712255531.466774}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255531.4004002}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 43.75349336349786, "units": "Tflops", "t": 1712255531.9696054}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255531.9067729}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.08711160933586, "units": "Tflops", "t": 1712255532.4688323}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255532.4130595}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.610972206472205, "units": "Tflops", "t": 1712255532.9620824}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255532.919665}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 43.09779499797335, "units": "Tflops", "t": 1712255533.472614}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255533.4260178}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.15018997197237, "units": "Tflops", "t": 1712255533.9710188}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255533.932297}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.62970902029946, "units": "Tflops", "t": 1712255534.4642384}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255534.438724}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 43.868657300603886, "units": "Tflops", "t": 1712255534.9657967}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255534.9454157}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.17471867761264, "units": "Tflops", "t": 1712255535.4639325}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255535.4516432}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.68978139498773, "units": "Tflops", "t": 1712255535.9564278}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.41669995186631, "units": "Tflops", "t": 1712255536.4517384}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255535.958055}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.45832031573522, "units": "Tflops", "t": 1712255536.9469166}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255536.4645443}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.624418809402805, "units": "Tflops", "t": 1712255537.4399807}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255536.970678}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 43.51293723763094, "units": "Tflops", "t": 1712255537.9458098}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255537.4771962}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.843353780268046, "units": "Tflops", "t": 1712255538.4365218}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255537.9837294}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 43.19410918894905, "units": "Tflops", "t": 1712255538.9460611}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255538.490056}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.731806881886584, "units": "Tflops", "t": 1712255539.4379442}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255538.9964588}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.571047127787935, "units": "Tflops", "t": 1712255539.931694}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255539.5027804}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.242059717574854, "units": "Tflops", "t": 1712255540.4180152}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255540.0090275}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.04321742162876, "units": "Tflops", "t": 1712255540.9174335}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255540.515736}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.64651645777013, "units": "Tflops", "t": 1712255541.4100928}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255541.0220466}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 43.7110063924343, "units": "Tflops", "t": 1712255541.913332}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255541.5282109}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.584642082157416, "units": "Tflops", "t": 1712255542.4069076}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255542.0344613}, "pipe": "data"}
+{"event": "end", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "10", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "fp32", "--tf32"], "time": 1712255542.918109, "return_code": 0}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/tf32.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/tf32.D1.data
new file mode 100644
index 000000000..28301f97e
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/tf32.D1.data
@@ -0,0 +1,182 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/flops", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "flops", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops", "plan": {"method": "per_gpu"}, "tags": ["diagnostic", "flops"], "argv": {"--number": 10, "--repeat": 90, "--m": 8192, "--n": 8192, "--dtype": "fp32", "--tf32": true}, "weight": 0.0, "name": "tf32", "tag": ["tf32", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.75, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09830899151436032}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712255496.831005, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "10", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "fp32", "--tf32"], "time": 1712255496.8407166}, "pipe": null}
+{"event": "data", "data": {"task": "train", "rate": 44.7788458799578, "units": "Tflops", "t": 1712255498.6116586}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2412.5625, 24512.0], "load": 0, "temperature": null, "power": null}}, "t": 1712255497.9763858}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.11, "temperature": null, "power": null}}, "t": 1712255498.483068}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 43.95895885313246, "units": "Tflops", "t": 1712255499.1125872}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.24, "temperature": null, "power": null}}, "t": 1712255498.9896023}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.83969049569168, "units": "Tflops", "t": 1712255499.59244}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.44, "temperature": null, "power": null}}, "t": 1712255499.495858}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.45832031573522, "units": "Tflops", "t": 1712255500.0872195}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.57, "temperature": null, "power": null}}, "t": 1712255500.0023372}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.36973408817683, "units": "Tflops", "t": 1712255500.5730667}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.77, "temperature": null, "power": null}}, "t": 1712255500.5084572}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.85624276570781, "units": "Tflops", "t": 1712255501.0634317}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.9, "temperature": null, "power": null}}, "t": 1712255501.0147457}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.62530402262176, "units": "Tflops", "t": 1712255501.5563128}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255501.5209641}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.39257623782317, "units": "Tflops", "t": 1712255502.0408685}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255502.0272985}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.17425419621564, "units": "Tflops", "t": 1712255502.52782}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.77988941592725, "units": "Tflops", "t": 1712255503.0194016}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255502.5336595}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.28235172434591, "units": "Tflops", "t": 1712255503.5053566}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255503.0398607}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.61397162899378, "units": "Tflops", "t": 1712255503.9983792}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255503.5462012}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.14422779640043, "units": "Tflops", "t": 1712255504.4857323}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255504.0526247}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.84612286969951, "units": "Tflops", "t": 1712255504.976292}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255504.5589423}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.395056094837905, "units": "Tflops", "t": 1712255505.4608185}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255505.0652406}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.821953935089155, "units": "Tflops", "t": 1712255505.95168}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255505.5715632}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.25426857104126, "units": "Tflops", "t": 1712255506.4377108}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255506.0778887}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.614813266045914, "units": "Tflops", "t": 1712255506.9307034}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255506.5843349}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.27059432918133, "units": "Tflops", "t": 1712255507.4166696}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255507.0908298}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.178989540404615, "units": "Tflops", "t": 1712255507.903519}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255507.5970776}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.893533834062595, "units": "Tflops", "t": 1712255508.3936152}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255508.1036634}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.943760616541226, "units": "Tflops", "t": 1712255508.883008}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255508.6099136}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.371006213136134, "units": "Tflops", "t": 1712255509.3677788}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255509.1162822}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 43.851095980508966, "units": "Tflops", "t": 1712255509.869351}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255509.622495}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.38022567106827, "units": "Tflops", "t": 1712255510.3541443}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255510.1286962}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.92675056188943, "units": "Tflops", "t": 1712255510.8437297}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255510.6357908}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.68997627672255, "units": "Tflops", "t": 1712255511.3360486}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255511.142092}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.72117926129767, "units": "Tflops", "t": 1712255511.827869}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255511.6484013}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.222140464981514, "units": "Tflops", "t": 1712255512.325248}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255512.154714}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.50081315456582, "units": "Tflops", "t": 1712255512.8196175}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255512.6609316}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.155368334437334, "units": "Tflops", "t": 1712255513.317753}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255513.1671534}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.39386759221849, "units": "Tflops", "t": 1712255513.8132057}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255513.6736007}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.129869928928215, "units": "Tflops", "t": 1712255514.3007226}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255514.1799622}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.83853594378456, "units": "Tflops", "t": 1712255514.7912612}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255514.6861465}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.89980604170736, "units": "Tflops", "t": 1712255515.2811234}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255515.1923363}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.86259184156734, "units": "Tflops", "t": 1712255515.7715054}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255515.698581}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.46614353772299, "units": "Tflops", "t": 1712255516.2661517}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255516.2046137}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.42164150130122, "units": "Tflops", "t": 1712255516.761284}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255516.711006}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.01116101396581, "units": "Tflops", "t": 1712255517.2499323}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255517.217488}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.77719371413746, "units": "Tflops", "t": 1712255517.7411423}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255517.7238474}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.41163119298791, "units": "Tflops", "t": 1712255518.236544}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255518.2301853}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.06380909337978, "units": "Tflops", "t": 1712255518.7246342}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.97790713095039, "units": "Tflops", "t": 1712255519.2135952}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255518.736472}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.40323484066572, "units": "Tflops", "t": 1712255519.6980278}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255519.2429507}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.11737497642122, "units": "Tflops", "t": 1712255520.1856508}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255519.7492359}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.266417367973084, "units": "Tflops", "t": 1712255520.6715488}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255520.2565103}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.25171526596159, "units": "Tflops", "t": 1712255521.1576216}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255520.7628348}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.00887666735363, "units": "Tflops", "t": 1712255521.6463003}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255521.2690878}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.609828647973714, "units": "Tflops", "t": 1712255522.1393409}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255521.775312}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.286798455385124, "units": "Tflops", "t": 1712255522.6250246}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255522.2815325}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.85506478435751, "units": "Tflops", "t": 1712255523.11552}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255522.788174}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.02370708803006, "units": "Tflops", "t": 1712255523.6040359}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255523.2944717}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.623296150469585, "units": "Tflops", "t": 1712255524.0969372}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255523.8007686}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.75496655209367, "units": "Tflops", "t": 1712255524.588385}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255524.3072574}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.62869406628652, "units": "Tflops", "t": 1712255525.081225}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255524.8135872}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.252114894693726, "units": "Tflops", "t": 1712255525.5672698}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255525.319915}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.85421405852636, "units": "Tflops", "t": 1712255526.0576234}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255525.8265164}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.01922397492914, "units": "Tflops", "t": 1712255526.5461853}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255526.332841}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.86999045452822, "units": "Tflops", "t": 1712255527.0363715}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255526.839161}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.27126093686466, "units": "Tflops", "t": 1712255527.5222168}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255527.3457804}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.41069028490827, "units": "Tflops", "t": 1712255528.017479}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255527.8520536}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.296628829430546, "units": "Tflops", "t": 1712255528.5031672}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255528.3583648}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.89211354260454, "units": "Tflops", "t": 1712255528.9931178}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255528.8647535}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.440142856779296, "units": "Tflops", "t": 1712255529.4771535}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255529.371083}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.694199131712345, "units": "Tflops", "t": 1712255529.969269}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255529.8774142}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.59266074372305, "units": "Tflops", "t": 1712255530.4625041}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255530.3838887}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.54872286506226, "units": "Tflops", "t": 1712255530.9562283}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255530.890147}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.84839072519775, "units": "Tflops", "t": 1712255531.446665}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255531.396813}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.93454250196225, "units": "Tflops", "t": 1712255531.9363031}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255531.9031782}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.05168076998865, "units": "Tflops", "t": 1712255532.424522}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255532.4094706}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.2418599911157, "units": "Tflops", "t": 1712255532.9106786}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.613043706998376, "units": "Tflops", "t": 1712255533.4036427}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255532.9159985}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.137136077151276, "units": "Tflops", "t": 1712255533.8909335}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255533.4224138}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.946980184043596, "units": "Tflops", "t": 1712255534.380283}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255533.9286559}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.7134394599446, "units": "Tflops", "t": 1712255534.8721898}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255534.4351397}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.97538491512793, "units": "Tflops", "t": 1712255535.3613765}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255534.9416437}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.087137592370524, "units": "Tflops", "t": 1712255535.8492131}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255535.4480195}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.939599954076904, "units": "Tflops", "t": 1712255536.338647}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255535.9544706}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.86370475130797, "units": "Tflops", "t": 1712255536.828903}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255536.4607756}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.77675895392144, "units": "Tflops", "t": 1712255537.3201087}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255536.9670353}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.10996075036498, "units": "Tflops", "t": 1712255537.8076937}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255537.473502}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.57800515724327, "units": "Tflops", "t": 1712255538.3010864}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255537.9800189}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.842525300290234, "units": "Tflops", "t": 1712255538.7915692}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255538.4864187}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.46104205602345, "units": "Tflops", "t": 1712255539.2862768}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255538.9928007}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.08696409638407, "units": "Tflops", "t": 1712255539.7851734}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255539.4991784}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.93104017205305, "units": "Tflops", "t": 1712255540.2748642}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255540.0054255}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.872413539580684, "units": "Tflops", "t": 1712255540.7650237}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255540.5118806}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.81633496006767, "units": "Tflops", "t": 1712255541.2558048}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255541.0182118}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.61518014341669, "units": "Tflops", "t": 1712255541.7487934}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255541.5245922}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.05916390045652, "units": "Tflops", "t": 1712255542.236922}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255542.0308287}, "pipe": "data"}
+{"event": "end", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "10", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "fp32", "--tf32"], "time": 1712255543.0094414, "return_code": 0}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/whisper.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/whisper.D0.data
new file mode 100644
index 000000000..7395c700d
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/whisper.D0.data
@@ -0,0 +1,59 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "hf", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "argv": {"--precision": "tf32-fp16", "--num-workers": 8, "--model": "Whisper", "--batch-size": 64}, "plan": {"method": "per_gpu"}, "tags": ["audio", "huggingface"], "weight": 1.0, "name": "whisper", "tag": ["whisper", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712255861.040341, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-whisper.D0-0efae956f1553a76c1e03985181900f5.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "Whisper", "--batch-size", "64"], "time": 1712255863.3807697}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2691.8125, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2691.8125, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2691.8125, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [23123.8125, 24512.0], "load": 0.05, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 1.61 GiB. GPU 0 has a total capacty of 23.73 GiB of which 1.36 GiB is free. Including non-PyTorch memory, this process has 20.23 GiB memory in use. Of the allocated memory 19.85 GiB is allocated by PyTorch, and 92.33 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"}
+{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"}
+{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"}
+{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"}
+{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 156, in \n", "pipe": "stderr"}
+{"event": "line", "data": " main()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 152, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " runner.train()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 70, in train\n", "pipe": "stderr"}
+{"event": "line", "data": " self.step(data)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 55, in step\n", "pipe": "stderr"}
+{"event": "line", "data": " outputs = self.model(**data)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/whisper/modeling_whisper.py\", line 2393, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " encoder_outputs = self.encoder(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/whisper/modeling_whisper.py\", line 1159, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " layer_outputs = encoder_layer(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/whisper/modeling_whisper.py\", line 722, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " hidden_states, attn_weights, _ = self.self_attn(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/whisper/modeling_whisper.py\", line 413, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " attn_weights = torch.bmm(query_states, key_states.transpose(1, 2))\n", "pipe": "stderr"}
+{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 1.61 GiB. GPU 0 has a total capacty of 23.73 GiB of which 1.36 GiB is free. Including non-PyTorch memory, this process has 20.23 GiB memory in use. Of the allocated memory 19.85 GiB is allocated by PyTorch, and 92.33 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-whisper.D0-0efae956f1553a76c1e03985181900f5.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "Whisper", "--batch-size", "64"], "time": 1712255876.4242365, "return_code": 1}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/whisper.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/whisper.D1.data
new file mode 100644
index 000000000..a8165dbcf
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/whisper.D1.data
@@ -0,0 +1,59 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "hf", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "argv": {"--precision": "tf32-fp16", "--num-workers": 8, "--model": "Whisper", "--batch-size": 64}, "plan": {"method": "per_gpu"}, "tags": ["audio", "huggingface"], "weight": 1.0, "name": "whisper", "tag": ["whisper", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0.12, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.75, "total": 24512.0}, "utilization": {"compute": 0.14, "memory": 0.09830899151436032}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712255863.363996, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-whisper.D1-0efae956f1553a76c1e03985181900f5.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "Whisper", "--batch-size", "64"], "time": 1712255863.3879108}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2691.8125, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2691.8125, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2691.8125, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [23123.8125, 24512.0], "load": 0.05, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 1.61 GiB. GPU 0 has a total capacty of 23.73 GiB of which 1.36 GiB is free. Including non-PyTorch memory, this process has 20.23 GiB memory in use. Of the allocated memory 19.85 GiB is allocated by PyTorch, and 92.33 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"}
+{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"}
+{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"}
+{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"}
+{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 156, in \n", "pipe": "stderr"}
+{"event": "line", "data": " main()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 152, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " runner.train()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 70, in train\n", "pipe": "stderr"}
+{"event": "line", "data": " self.step(data)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 55, in step\n", "pipe": "stderr"}
+{"event": "line", "data": " outputs = self.model(**data)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/whisper/modeling_whisper.py\", line 2393, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " encoder_outputs = self.encoder(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/whisper/modeling_whisper.py\", line 1159, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " layer_outputs = encoder_layer(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/whisper/modeling_whisper.py\", line 722, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " hidden_states, attn_weights, _ = self.self_attn(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/whisper/modeling_whisper.py\", line 413, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " attn_weights = torch.bmm(query_states, key_states.transpose(1, 2))\n", "pipe": "stderr"}
+{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 1.61 GiB. GPU 0 has a total capacty of 23.73 GiB of which 1.36 GiB is free. Including non-PyTorch memory, this process has 20.23 GiB memory in use. Of the allocated memory 19.85 GiB is allocated by PyTorch, and 92.33 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-whisper.D1-0efae956f1553a76c1e03985181900f5.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "Whisper", "--batch-size", "64"], "time": 1712255876.1995459, "return_code": 1}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/bert-fp16.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/bert-fp16.D0.data
new file mode 100644
index 000000000..a329488fe
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/bert-fp16.D0.data
@@ -0,0 +1,38 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "hf", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "argv": {"--precision": "fp16", "--num-workers": 8, "--model": "Bert", "--batch-size": 32}, "plan": {"method": "per_gpu"}, "tags": ["huggingface", "language-modeling", "nlp", "precision-showcase", "transformer"], "weight": 0.0, "name": "bert-fp16", "tag": ["bert-fp16", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712623418.981402, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-fp16.D0-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "fp16", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712623421.377318}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 10.480685234069824}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [1, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [23980.375, 24512.0], "load": 0.19, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 1.86 GiB. GPU 0 has a total capacty of 23.73 GiB of which 531.62 MiB is free. Including non-PyTorch memory, this process has 21.08 GiB memory in use. Of the allocated memory 19.58 GiB is allocated by PyTorch, and 1.22 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"}
+{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"}
+{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"}
+{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"}
+{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 156, in \n", "pipe": "stderr"}
+{"event": "line", "data": " main()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 152, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " runner.train()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 70, in train\n", "pipe": "stderr"}
+{"event": "line", "data": " self.step(data)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 59, in step\n", "pipe": "stderr"}
+{"event": "line", "data": " self.amp_scaler.scale(loss).backward()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/_tensor.py\", line 492, in backward\n", "pipe": "stderr"}
+{"event": "line", "data": " torch.autograd.backward(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/autograd/__init__.py\", line 251, in backward\n", "pipe": "stderr"}
+{"event": "line", "data": " Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass\n", "pipe": "stderr"}
+{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 1.86 GiB. GPU 0 has a total capacty of 23.73 GiB of which 531.62 MiB is free. Including non-PyTorch memory, this process has 21.08 GiB memory in use. Of the allocated memory 19.58 GiB is allocated by PyTorch, and 1.22 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-fp16.D0-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "fp16", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712623426.0404646, "return_code": 1}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/bert-fp16.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/bert-fp16.D1.data
new file mode 100644
index 000000000..75e304153
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/bert-fp16.D1.data
@@ -0,0 +1,38 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "hf", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "argv": {"--precision": "fp16", "--num-workers": 8, "--model": "Bert", "--batch-size": 32}, "plan": {"method": "per_gpu"}, "tags": ["huggingface", "language-modeling", "nlp", "precision-showcase", "transformer"], "weight": 0.0, "name": "bert-fp16", "tag": ["bert-fp16", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712623421.360096, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-fp16.D1-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "fp16", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712623421.3852859}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 10.480685234069824}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [1, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [23980.375, 24512.0], "load": 0.16, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 1.86 GiB. GPU 0 has a total capacty of 23.73 GiB of which 531.62 MiB is free. Including non-PyTorch memory, this process has 21.08 GiB memory in use. Of the allocated memory 19.58 GiB is allocated by PyTorch, and 1.22 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"}
+{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"}
+{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"}
+{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"}
+{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 156, in \n", "pipe": "stderr"}
+{"event": "line", "data": " main()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 152, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " runner.train()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 70, in train\n", "pipe": "stderr"}
+{"event": "line", "data": " self.step(data)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 59, in step\n", "pipe": "stderr"}
+{"event": "line", "data": " self.amp_scaler.scale(loss).backward()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/_tensor.py\", line 492, in backward\n", "pipe": "stderr"}
+{"event": "line", "data": " torch.autograd.backward(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/autograd/__init__.py\", line 251, in backward\n", "pipe": "stderr"}
+{"event": "line", "data": " Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass\n", "pipe": "stderr"}
+{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 1.86 GiB. GPU 0 has a total capacty of 23.73 GiB of which 531.62 MiB is free. Including non-PyTorch memory, this process has 21.08 GiB memory in use. Of the allocated memory 19.58 GiB is allocated by PyTorch, and 1.22 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-fp16.D1-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "fp16", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712623425.8826509, "return_code": 1}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/bert-fp32.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/bert-fp32.D0.data
new file mode 100644
index 000000000..dbf34133c
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/bert-fp32.D0.data
@@ -0,0 +1,56 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "hf", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "argv": {"--precision": "fp32", "--num-workers": 8, "--model": "Bert", "--batch-size": 32}, "plan": {"method": "per_gpu"}, "tags": ["huggingface", "language-modeling", "nlp", "precision-showcase", "transformer"], "weight": 0.0, "name": "bert-fp32", "tag": ["bert-fp32", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712623410.199405, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-fp32.D0-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "fp32", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712623412.5927112}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [23304.375, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 1.86 GiB. GPU 0 has a total capacty of 23.73 GiB of which 1.18 GiB is free. Including non-PyTorch memory, this process has 20.42 GiB memory in use. Of the allocated memory 20.07 GiB is allocated by PyTorch, and 81.52 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"}
+{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"}
+{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"}
+{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"}
+{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 156, in \n", "pipe": "stderr"}
+{"event": "line", "data": " main()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 152, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " runner.train()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 70, in train\n", "pipe": "stderr"}
+{"event": "line", "data": " self.step(data)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 55, in step\n", "pipe": "stderr"}
+{"event": "line", "data": " outputs = self.model(**data)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/bert/modeling_bert.py\", line 1375, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " prediction_scores = self.cls(sequence_output)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/bert/modeling_bert.py\", line 707, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " prediction_scores = self.predictions(sequence_output)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/bert/modeling_bert.py\", line 697, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " hidden_states = self.decoder(hidden_states)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/linear.py\", line 114, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return F.linear(input, self.weight, self.bias)\n", "pipe": "stderr"}
+{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 1.86 GiB. GPU 0 has a total capacty of 23.73 GiB of which 1.18 GiB is free. Including non-PyTorch memory, this process has 20.42 GiB memory in use. Of the allocated memory 20.07 GiB is allocated by PyTorch, and 81.52 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-fp32.D0-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "fp32", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712623416.502414, "return_code": 1}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/bert-fp32.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/bert-fp32.D1.data
new file mode 100644
index 000000000..41842a761
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/bert-fp32.D1.data
@@ -0,0 +1,56 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "hf", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "argv": {"--precision": "fp32", "--num-workers": 8, "--model": "Bert", "--batch-size": 32}, "plan": {"method": "per_gpu"}, "tags": ["huggingface", "language-modeling", "nlp", "precision-showcase", "transformer"], "weight": 0.0, "name": "bert-fp32", "tag": ["bert-fp32", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712623412.575147, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-fp32.D1-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "fp32", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712623412.6011856}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [23304.375, 24512.0], "load": 0.02, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 1.86 GiB. GPU 0 has a total capacty of 23.73 GiB of which 1.18 GiB is free. Including non-PyTorch memory, this process has 20.42 GiB memory in use. Of the allocated memory 20.07 GiB is allocated by PyTorch, and 81.52 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"}
+{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"}
+{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"}
+{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"}
+{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 156, in \n", "pipe": "stderr"}
+{"event": "line", "data": " main()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 152, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " runner.train()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 70, in train\n", "pipe": "stderr"}
+{"event": "line", "data": " self.step(data)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 55, in step\n", "pipe": "stderr"}
+{"event": "line", "data": " outputs = self.model(**data)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/bert/modeling_bert.py\", line 1375, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " prediction_scores = self.cls(sequence_output)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/bert/modeling_bert.py\", line 707, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " prediction_scores = self.predictions(sequence_output)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/bert/modeling_bert.py\", line 697, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " hidden_states = self.decoder(hidden_states)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/linear.py\", line 114, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return F.linear(input, self.weight, self.bias)\n", "pipe": "stderr"}
+{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 1.86 GiB. GPU 0 has a total capacty of 23.73 GiB of which 1.18 GiB is free. Including non-PyTorch memory, this process has 20.42 GiB memory in use. Of the allocated memory 20.07 GiB is allocated by PyTorch, and 81.52 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-fp32.D1-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "fp32", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712623416.5814161, "return_code": 1}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/bert-tf32-fp16.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/bert-tf32-fp16.D0.data
new file mode 100644
index 000000000..abfb3db52
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/bert-tf32-fp16.D0.data
@@ -0,0 +1,38 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "hf", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "argv": {"--precision": "tf32-fp16", "--num-workers": 8, "--model": "Bert", "--batch-size": 32}, "plan": {"method": "per_gpu"}, "tags": ["huggingface", "language-modeling", "nlp", "precision-showcase", "transformer"], "weight": 3.0, "name": "bert-tf32-fp16", "tag": ["bert-tf32-fp16", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712623437.305882, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-tf32-fp16.D0-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712623439.7101252}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 10.480685234069824}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [1, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [23980.375, 24512.0], "load": 0.16, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 1.86 GiB. GPU 0 has a total capacty of 23.73 GiB of which 531.62 MiB is free. Including non-PyTorch memory, this process has 21.08 GiB memory in use. Of the allocated memory 19.58 GiB is allocated by PyTorch, and 1.22 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"}
+{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"}
+{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"}
+{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"}
+{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 156, in \n", "pipe": "stderr"}
+{"event": "line", "data": " main()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 152, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " runner.train()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 70, in train\n", "pipe": "stderr"}
+{"event": "line", "data": " self.step(data)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 59, in step\n", "pipe": "stderr"}
+{"event": "line", "data": " self.amp_scaler.scale(loss).backward()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/_tensor.py\", line 492, in backward\n", "pipe": "stderr"}
+{"event": "line", "data": " torch.autograd.backward(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/autograd/__init__.py\", line 251, in backward\n", "pipe": "stderr"}
+{"event": "line", "data": " Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass\n", "pipe": "stderr"}
+{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 1.86 GiB. GPU 0 has a total capacty of 23.73 GiB of which 531.62 MiB is free. Including non-PyTorch memory, this process has 21.08 GiB memory in use. Of the allocated memory 19.58 GiB is allocated by PyTorch, and 1.22 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-tf32-fp16.D0-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712623443.823012, "return_code": 1}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/bert-tf32-fp16.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/bert-tf32-fp16.D1.data
new file mode 100644
index 000000000..cbe1e673f
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/bert-tf32-fp16.D1.data
@@ -0,0 +1,38 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "hf", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "argv": {"--precision": "tf32-fp16", "--num-workers": 8, "--model": "Bert", "--batch-size": 32}, "plan": {"method": "per_gpu"}, "tags": ["huggingface", "language-modeling", "nlp", "precision-showcase", "transformer"], "weight": 3.0, "name": "bert-tf32-fp16", "tag": ["bert-tf32-fp16", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712623439.693056, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-tf32-fp16.D1-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712623439.718452}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 10.480685234069824}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [1, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [23980.375, 24512.0], "load": 0.19, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 1.86 GiB. GPU 0 has a total capacty of 23.73 GiB of which 531.62 MiB is free. Including non-PyTorch memory, this process has 21.08 GiB memory in use. Of the allocated memory 19.58 GiB is allocated by PyTorch, and 1.22 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"}
+{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"}
+{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"}
+{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"}
+{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 156, in \n", "pipe": "stderr"}
+{"event": "line", "data": " main()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 152, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " runner.train()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 70, in train\n", "pipe": "stderr"}
+{"event": "line", "data": " self.step(data)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 59, in step\n", "pipe": "stderr"}
+{"event": "line", "data": " self.amp_scaler.scale(loss).backward()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/_tensor.py\", line 492, in backward\n", "pipe": "stderr"}
+{"event": "line", "data": " torch.autograd.backward(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/autograd/__init__.py\", line 251, in backward\n", "pipe": "stderr"}
+{"event": "line", "data": " Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass\n", "pipe": "stderr"}
+{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 1.86 GiB. GPU 0 has a total capacty of 23.73 GiB of which 531.62 MiB is free. Including non-PyTorch memory, this process has 21.08 GiB memory in use. Of the allocated memory 19.58 GiB is allocated by PyTorch, and 1.22 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-tf32-fp16.D1-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712623444.1434972, "return_code": 1}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/bert-tf32.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/bert-tf32.D0.data
new file mode 100644
index 000000000..a6ed56507
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/bert-tf32.D0.data
@@ -0,0 +1,56 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "hf", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "argv": {"--precision": "tf32", "--num-workers": 8, "--model": "Bert", "--batch-size": 32}, "plan": {"method": "per_gpu"}, "tags": ["huggingface", "language-modeling", "nlp", "precision-showcase", "transformer"], "weight": 0.0, "name": "bert-tf32", "tag": ["bert-tf32", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712623428.442214, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-tf32.D0-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "tf32", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712623430.9410863}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [23304.375, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 1.86 GiB. GPU 0 has a total capacty of 23.73 GiB of which 1.18 GiB is free. Including non-PyTorch memory, this process has 20.42 GiB memory in use. Of the allocated memory 20.07 GiB is allocated by PyTorch, and 81.52 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"}
+{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"}
+{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"}
+{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"}
+{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 156, in \n", "pipe": "stderr"}
+{"event": "line", "data": " main()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 152, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " runner.train()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 70, in train\n", "pipe": "stderr"}
+{"event": "line", "data": " self.step(data)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 55, in step\n", "pipe": "stderr"}
+{"event": "line", "data": " outputs = self.model(**data)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/bert/modeling_bert.py\", line 1375, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " prediction_scores = self.cls(sequence_output)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/bert/modeling_bert.py\", line 707, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " prediction_scores = self.predictions(sequence_output)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/bert/modeling_bert.py\", line 697, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " hidden_states = self.decoder(hidden_states)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/linear.py\", line 114, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return F.linear(input, self.weight, self.bias)\n", "pipe": "stderr"}
+{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 1.86 GiB. GPU 0 has a total capacty of 23.73 GiB of which 1.18 GiB is free. Including non-PyTorch memory, this process has 20.42 GiB memory in use. Of the allocated memory 20.07 GiB is allocated by PyTorch, and 81.52 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-tf32.D0-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "tf32", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712623434.8314877, "return_code": 1}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/bert-tf32.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/bert-tf32.D1.data
new file mode 100644
index 000000000..ea0791402
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/bert-tf32.D1.data
@@ -0,0 +1,56 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "hf", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "argv": {"--precision": "tf32", "--num-workers": 8, "--model": "Bert", "--batch-size": 32}, "plan": {"method": "per_gpu"}, "tags": ["huggingface", "language-modeling", "nlp", "precision-showcase", "transformer"], "weight": 0.0, "name": "bert-tf32", "tag": ["bert-tf32", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712623430.922227, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-tf32.D1-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "tf32", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712623430.9480348}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [23304.375, 24512.0], "load": 0.02, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 1.86 GiB. GPU 0 has a total capacty of 23.73 GiB of which 1.18 GiB is free. Including non-PyTorch memory, this process has 20.42 GiB memory in use. Of the allocated memory 20.07 GiB is allocated by PyTorch, and 81.52 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"}
+{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"}
+{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"}
+{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"}
+{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 156, in \n", "pipe": "stderr"}
+{"event": "line", "data": " main()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 152, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " runner.train()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 70, in train\n", "pipe": "stderr"}
+{"event": "line", "data": " self.step(data)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 55, in step\n", "pipe": "stderr"}
+{"event": "line", "data": " outputs = self.model(**data)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/bert/modeling_bert.py\", line 1375, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " prediction_scores = self.cls(sequence_output)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/bert/modeling_bert.py\", line 707, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " prediction_scores = self.predictions(sequence_output)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/bert/modeling_bert.py\", line 697, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " hidden_states = self.decoder(hidden_states)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/linear.py\", line 114, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return F.linear(input, self.weight, self.bias)\n", "pipe": "stderr"}
+{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 1.86 GiB. GPU 0 has a total capacty of 23.73 GiB of which 1.18 GiB is free. Including non-PyTorch memory, this process has 20.42 GiB memory in use. Of the allocated memory 20.07 GiB is allocated by PyTorch, and 81.52 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-tf32.D1-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "tf32", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712623434.917985, "return_code": 1}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/bf16.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/bf16.D0.data
new file mode 100644
index 000000000..eb377ccba
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/bf16.D0.data
@@ -0,0 +1,138 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/flops", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "flops", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops", "plan": {"method": "per_gpu"}, "tags": ["diagnostic", "flops"], "argv": {"--number": 10, "--repeat": 90, "--m": 8192, "--n": 8192, "--dtype": "bf16"}, "weight": 0.0, "name": "bf16", "tag": ["bf16", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712623063.236003, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "10", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "bf16"], "time": 1712623065.6052935}, "pipe": null}
+{"event": "data", "data": {"task": "train", "rate": 82.18137656811277, "units": "Tflops", "t": 1712623067.2041838}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2393.125, 24512.0], "load": 0, "temperature": null, "power": null}}, "t": 1712623066.809077}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.78082259896284, "units": "Tflops", "t": 1712623067.439379}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.05, "temperature": null, "power": null}}, "t": 1712623067.3150377}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 94.11527290445359, "units": "Tflops", "t": 1712623067.6731403}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 88.5973314946801, "units": "Tflops", "t": 1712623067.921405}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.25, "temperature": null, "power": null}}, "t": 1712623067.8205304}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.04916179914048, "units": "Tflops", "t": 1712623068.1604064}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.47516402192292, "units": "Tflops", "t": 1712623068.3957233}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.38, "temperature": null, "power": null}}, "t": 1712623068.326009}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.47980618499372, "units": "Tflops", "t": 1712623068.6310735}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 89.07812769495402, "units": "Tflops", "t": 1712623068.8780403}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.58, "temperature": null, "power": null}}, "t": 1712623068.8315809}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.67696615309998, "units": "Tflops", "t": 1712623069.1189268}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.21951675826642, "units": "Tflops", "t": 1712623069.3548973}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.78, "temperature": null, "power": null}}, "t": 1712623069.3371701}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.34832463942529, "units": "Tflops", "t": 1712623069.5931332}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 90.9526522614453, "units": "Tflops", "t": 1712623069.8349648}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.2184589207597, "units": "Tflops", "t": 1712623070.076089}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.91, "temperature": null, "power": null}}, "t": 1712623069.8425956}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.99875554688103, "units": "Tflops", "t": 1712623070.3152518}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.29094075181388, "units": "Tflops", "t": 1712623070.5535767}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.98, "temperature": null, "power": null}}, "t": 1712623070.348327}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.16643491499788, "units": "Tflops", "t": 1712623070.7948887}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.25762877121089, "units": "Tflops", "t": 1712623071.03603}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.98, "temperature": null, "power": null}}, "t": 1712623070.8537886}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.20983802050046, "units": "Tflops", "t": 1712623071.2751284}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.32281188201391, "units": "Tflops", "t": 1712623071.5133786}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.98, "temperature": null, "power": null}}, "t": 1712623071.3592489}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.00542021194711, "units": "Tflops", "t": 1712623071.7552521}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.02895218636127, "units": "Tflops", "t": 1712623071.996881}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.98, "temperature": null, "power": null}}, "t": 1712623071.8649027}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.34249983335145, "units": "Tflops", "t": 1712623072.2351222}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.34157532853149, "units": "Tflops", "t": 1712623072.4733238}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623072.3703656}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.2754197367318, "units": "Tflops", "t": 1712623072.7143567}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.01215522116922, "units": "Tflops", "t": 1712623072.956148}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623072.8758824}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.64271648990436, "units": "Tflops", "t": 1712623073.1962337}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.33501187657636, "units": "Tflops", "t": 1712623073.4345567}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623073.381343}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.38421272703722, "units": "Tflops", "t": 1712623073.6753113}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.33733575544383, "units": "Tflops", "t": 1712623073.9161294}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623073.8868787}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.35760097519562, "units": "Tflops", "t": 1712623074.1570761}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.16551788531262, "units": "Tflops", "t": 1712623074.395735}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623074.392352}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.37706042704421, "units": "Tflops", "t": 1712623074.6364944}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.36194469229389, "units": "Tflops", "t": 1712623074.8773472}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.38004796050076, "units": "Tflops", "t": 1712623075.118053}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623074.8978448}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.00618906299496, "units": "Tflops", "t": 1712623075.3571844}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 90.48296546967502, "units": "Tflops", "t": 1712623075.6002727}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623075.4033408}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.36656034431981, "units": "Tflops", "t": 1712623075.8412232}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.37832784162232, "units": "Tflops", "t": 1712623076.0819278}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623075.9087265}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.9904057453513, "units": "Tflops", "t": 1712623076.3210876}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.02571806411885, "units": "Tflops", "t": 1712623076.5627272}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623076.414348}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.3660173022285, "units": "Tflops", "t": 1712623076.803643}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.37561199629059, "units": "Tflops", "t": 1712623077.0443556}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623076.9198291}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.46196431366901, "units": "Tflops", "t": 1712623077.2848835}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.34511508896233, "units": "Tflops", "t": 1712623077.5256891}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623077.425265}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.4496312303785, "units": "Tflops", "t": 1712623077.7664015}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.37597409967411, "units": "Tflops", "t": 1712623078.0071177}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623077.9306684}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.37009027526594, "units": "Tflops", "t": 1712623078.2479057}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.37262475287666, "units": "Tflops", "t": 1712623078.488631}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623078.436275}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.36837053124633, "units": "Tflops", "t": 1712623078.7294173}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.36610780879543, "units": "Tflops", "t": 1712623078.9702625}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623078.941826}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.37135749649583, "units": "Tflops", "t": 1712623079.2110434}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.38059116939033, "units": "Tflops", "t": 1712623079.4517431}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623079.4473877}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.38041009904289, "units": "Tflops", "t": 1712623079.6924856}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 90.9513069406841, "units": "Tflops", "t": 1712623079.9343135}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.35389104390393, "units": "Tflops", "t": 1712623080.1750827}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623079.9528053}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.36122071076, "units": "Tflops", "t": 1712623080.4160469}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.26205328719895, "units": "Tflops", "t": 1712623080.6570585}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623080.45829}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 90.71122319838605, "units": "Tflops", "t": 1712623080.8995764}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.37199112029228, "units": "Tflops", "t": 1712623081.1402974}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623080.9637456}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.37407308892712, "units": "Tflops", "t": 1712623081.3810673}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 90.96879921545683, "units": "Tflops", "t": 1712623081.6228526}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623081.469183}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 90.72219783049391, "units": "Tflops", "t": 1712623081.865471}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.37579304762363, "units": "Tflops", "t": 1712623082.1061785}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623081.974656}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.0311982398, "units": "Tflops", "t": 1712623082.3478477}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.11969102606311, "units": "Tflops", "t": 1712623082.589238}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623082.4801822}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 90.76496175738967, "units": "Tflops", "t": 1712623082.8316133}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.37950475806933, "units": "Tflops", "t": 1712623083.072312}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623082.9856522}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 90.99895554081445, "units": "Tflops", "t": 1712623083.3140626}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 90.90478334088733, "units": "Tflops", "t": 1712623083.5561328}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623083.4910765}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 90.45004547184301, "units": "Tflops", "t": 1712623083.7993891}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.37425413416183, "units": "Tflops", "t": 1712623084.0401084}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623083.996587}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.05869891771358, "units": "Tflops", "t": 1712623084.2817082}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 90.3255037757806, "units": "Tflops", "t": 1712623084.525223}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623084.5020697}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 90.69588101013983, "units": "Tflops", "t": 1712623084.767784}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 90.50791543699513, "units": "Tflops", "t": 1712623085.0107996}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623085.0075552}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.36185419397465, "units": "Tflops", "t": 1712623085.2515903}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 90.48749273377143, "units": "Tflops", "t": 1712623085.4947727}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 90.32594606177697, "units": "Tflops", "t": 1712623085.7382817}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623085.5131333}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 90.54292184599564, "units": "Tflops", "t": 1712623085.9812646}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.36493123741121, "units": "Tflops", "t": 1712623086.2220073}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623086.0186527}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 90.39650186465059, "units": "Tflops", "t": 1712623086.4653745}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 90.56888315833132, "units": "Tflops", "t": 1712623086.708235}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623086.52411}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 90.32152339671589, "units": "Tflops", "t": 1712623086.951797}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.12770343199365, "units": "Tflops", "t": 1712623087.1931581}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623087.029592}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 90.71220456143875, "units": "Tflops", "t": 1712623087.4356759}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 90.61488544015931, "units": "Tflops", "t": 1712623087.6785293}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623087.5351698}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 89.75880094995466, "units": "Tflops", "t": 1712623087.923652}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.38683853583618, "units": "Tflops", "t": 1712623088.1643443}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623088.0407677}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 90.27970640315782, "units": "Tflops", "t": 1712623088.4080324}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 90.08730035039915, "units": "Tflops", "t": 1712623088.6521826}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623088.5463588}, "pipe": "data"}
+{"event": "end", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "10", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "bf16"], "time": 1712623089.4562054, "return_code": 0}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/bf16.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/bf16.D1.data
new file mode 100644
index 000000000..52268b3b6
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/bf16.D1.data
@@ -0,0 +1,138 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/flops", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "flops", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops", "plan": {"method": "per_gpu"}, "tags": ["diagnostic", "flops"], "argv": {"--number": 10, "--repeat": 90, "--m": 8192, "--n": 8192, "--dtype": "bf16"}, "weight": 0.0, "name": "bf16", "tag": ["bf16", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712623065.596248, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "10", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "bf16"], "time": 1712623065.605839}, "pipe": null}
+{"event": "data", "data": {"task": "train", "rate": 81.89998922773792, "units": "Tflops", "t": 1712623067.175949}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2393.125, 24512.0], "load": 0, "temperature": null, "power": null}}, "t": 1712623066.7665226}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.83491943376553, "units": "Tflops", "t": 1712623067.4109821}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.09, "temperature": null, "power": null}}, "t": 1712623067.2727697}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.45944093642903, "units": "Tflops", "t": 1712623067.6463833}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 89.59209737737582, "units": "Tflops", "t": 1712623067.8918984}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.23, "temperature": null, "power": null}}, "t": 1712623067.778479}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.4082355220014, "units": "Tflops", "t": 1712623068.1274264}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.61843057518533, "units": "Tflops", "t": 1712623068.3623729}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.42, "temperature": null, "power": null}}, "t": 1712623068.2842638}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.65445850405426, "units": "Tflops", "t": 1712623068.5972767}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 90.68277290357786, "units": "Tflops", "t": 1712623068.839835}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.56, "temperature": null, "power": null}}, "t": 1712623068.7900558}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.28906116362654, "units": "Tflops", "t": 1712623069.0813003}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.32440266246397, "units": "Tflops", "t": 1712623069.317004}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.75, "temperature": null, "power": null}}, "t": 1712623069.2955978}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.25599255897929, "units": "Tflops", "t": 1712623069.5531356}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.37190060206905, "units": "Tflops", "t": 1712623069.7938764}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.70057323575507, "units": "Tflops", "t": 1712623070.033749}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.89, "temperature": null, "power": null}}, "t": 1712623069.8011928}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.62636352352396, "units": "Tflops", "t": 1712623070.271347}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.78607587825098, "units": "Tflops", "t": 1712623070.5084784}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623070.3069987}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.48074237803415, "units": "Tflops", "t": 1712623070.7489982}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.59548304270619, "units": "Tflops", "t": 1712623070.9892557}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623070.8129425}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.3687635007859, "units": "Tflops", "t": 1712623071.227462}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.81502084409433, "units": "Tflops", "t": 1712623071.4648328}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623071.3187666}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.99343352019302, "units": "Tflops", "t": 1712623071.7041874}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 90.99428718283042, "units": "Tflops", "t": 1712623071.946012}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623071.8245184}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.866804351966, "units": "Tflops", "t": 1712623072.1855183}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.33501187657636, "units": "Tflops", "t": 1712623072.423902}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623072.3304276}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.76434574008722, "units": "Tflops", "t": 1712623072.6636903}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.42941579777474, "units": "Tflops", "t": 1712623072.9043434}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623072.8361802}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.83067553363065, "units": "Tflops", "t": 1712623073.1439433}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.45023882788266, "units": "Tflops", "t": 1712623073.3818696}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623073.3419278}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.8512517562348, "units": "Tflops", "t": 1712623073.6213996}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 90.77112522775298, "units": "Tflops", "t": 1712623073.8637717}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623073.848226}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.68817572299594, "units": "Tflops", "t": 1712623074.103805}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.07406394167289, "units": "Tflops", "t": 1712623074.3429682}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.25853791524695, "units": "Tflops", "t": 1712623074.581467}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623074.3541095}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 90.75969220740726, "units": "Tflops", "t": 1712623074.8241622}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.8577466116662, "units": "Tflops", "t": 1712623075.0637395}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623074.8601513}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.723827610859, "units": "Tflops", "t": 1712623075.303635}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.68617055797553, "units": "Tflops", "t": 1712623075.5436442}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623075.3660345}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.71443324750041, "units": "Tflops", "t": 1712623075.7836547}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.32612137432967, "units": "Tflops", "t": 1712623076.0245068}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623075.8719141}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.45534393630587, "units": "Tflops", "t": 1712623076.2652109}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.85353857239662, "units": "Tflops", "t": 1712623076.5046775}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623076.378009}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.12365203940374, "units": "Tflops", "t": 1712623076.7462792}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.2095285722104, "units": "Tflops", "t": 1712623076.9874337}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623076.8839407}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.29104899660878, "units": "Tflops", "t": 1712623077.2286203}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.9753615778755, "units": "Tflops", "t": 1712623077.4677694}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623077.3897924}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.2525727068042, "units": "Tflops", "t": 1712623077.709035}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.12104133283255, "units": "Tflops", "t": 1712623077.950453}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623077.895687}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 89.6168196188575, "units": "Tflops", "t": 1712623078.196175}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.38254865443005, "units": "Tflops", "t": 1712623078.4344354}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623078.4016285}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.28463402855289, "units": "Tflops", "t": 1712623078.6754656}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.36185419397465, "units": "Tflops", "t": 1712623078.916415}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623078.9076514}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 90.2271585886938, "units": "Tflops", "t": 1712623079.1604395}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.02454856964896, "units": "Tflops", "t": 1712623079.3994665}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.44419122154098, "units": "Tflops", "t": 1712623079.6401672}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623079.4136045}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.38167760654434, "units": "Tflops", "t": 1712623079.881059}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 90.93767660162145, "units": "Tflops", "t": 1712623080.1231346}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623079.919495}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.18968961217794, "units": "Tflops", "t": 1712623080.3644476}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.67441476407215, "units": "Tflops", "t": 1712623080.6044745}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623080.4254313}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.20204285971863, "units": "Tflops", "t": 1712623080.8457232}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 90.54185526113812, "units": "Tflops", "t": 1712623081.0888922}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623080.9313893}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.38258299057357, "units": "Tflops", "t": 1712623081.3296852}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.871013606828, "units": "Tflops", "t": 1712623081.56925}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623081.4375348}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.04521611735184, "units": "Tflops", "t": 1712623081.8110604}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.0302099626318, "units": "Tflops", "t": 1712623082.052828}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623081.9436045}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.44718314630637, "units": "Tflops", "t": 1712623082.293524}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.39924525933007, "units": "Tflops", "t": 1712623082.534178}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623082.4494996}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.6563768507648, "units": "Tflops", "t": 1712623082.7744324}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 89.09525105488827, "units": "Tflops", "t": 1712623083.0213134}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623082.9550524}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.7740242809246, "units": "Tflops", "t": 1712623083.2612116}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.35443394185738, "units": "Tflops", "t": 1712623083.5019965}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623083.4610865}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 90.82475592980865, "units": "Tflops", "t": 1712623083.744448}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 90.44543326208972, "units": "Tflops", "t": 1712623083.987792}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623083.9668524}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.66739917643574, "units": "Tflops", "t": 1712623084.228061}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.41056820188973, "units": "Tflops", "t": 1712623084.468703}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.02859282809659, "units": "Tflops", "t": 1712623084.710519}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623084.4727085}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 89.98244946537358, "units": "Tflops", "t": 1712623084.9550548}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 90.76103186549818, "units": "Tflops", "t": 1712623085.1975033}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623084.978622}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.42488441611836, "units": "Tflops", "t": 1712623085.4383972}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 90.75674509880973, "units": "Tflops", "t": 1712623085.6809566}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623085.4844997}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 90.83888922450546, "units": "Tflops", "t": 1712623085.9231915}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.04072273779174, "units": "Tflops", "t": 1712623086.1649625}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623085.9915082}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.06840900689552, "units": "Tflops", "t": 1712623086.4067826}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.07830104153484, "units": "Tflops", "t": 1712623086.648295}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623086.4973695}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 90.4584728390808, "units": "Tflops", "t": 1712623086.891674}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.03569067920935, "units": "Tflops", "t": 1712623087.1334126}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623087.0033453}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.10385041569235, "units": "Tflops", "t": 1712623087.3751059}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 90.83763672964976, "units": "Tflops", "t": 1712623087.6172595}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623087.508977}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 89.26313257943669, "units": "Tflops", "t": 1712623087.8639495}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.01655596561953, "units": "Tflops", "t": 1712623088.1057642}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623088.014946}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 90.85472731732538, "units": "Tflops", "t": 1712623088.347928}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.37995742631371, "units": "Tflops", "t": 1712623088.5888007}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623088.5208538}, "pipe": "data"}
+{"event": "end", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "10", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "bf16"], "time": 1712623089.3806674, "return_code": 0}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/convnext_large-fp16.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/convnext_large-fp16.D0.data
new file mode 100644
index 000000000..b2919edb5
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/convnext_large-fp16.D0.data
@@ -0,0 +1,72 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "torchvision", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision", "plan": {"method": "per_gpu"}, "argv": {"--precision": "fp16", "--lr": 0.01, "--no-stdout": true, "--epochs": 50, "--model": "convnext_large", "--batch-size": 128}, "tags": ["classification", "convnet", "precision-showcase", "vision"], "weight": 0.0, "name": "convnext_large-fp16", "tag": ["convnext_large-fp16", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712623364.996635, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-fp16.D0-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712623367.3813853}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "progress": [0, 50]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"}
+{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py:456: UserWarning: Applied workaround for CuDNN issue, install nvrtc.so (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:80.)\n", "pipe": "stderr"}
+{"event": "line", "data": " return F.conv2d(input, weight, bias, self.stride,\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3754.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24374.375, 24512.0], "load": 0.07, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 148.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 137.62 MiB is free. Including non-PyTorch memory, this process has 21.47 GiB memory in use. Of the allocated memory 21.13 GiB is allocated by PyTorch, and 53.29 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"}
+{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"}
+{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"}
+{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"}
+{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 224, in \n", "pipe": "stderr"}
+{"event": "line", "data": " main()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 218, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " train_epoch(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 51, in train_epoch\n", "pipe": "stderr"}
+{"event": "line", "data": " output = model(inp)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 176, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._forward_impl(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 170, in _forward_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " x = self.features(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 63, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " result = self.layer_scale * self.block(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/activation.py\", line 682, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return F.gelu(input, approximate=self.approximate)\n", "pipe": "stderr"}
+{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 148.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 137.62 MiB is free. Including non-PyTorch memory, this process has 21.47 GiB memory in use. Of the allocated memory 21.13 GiB is allocated by PyTorch, and 53.29 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-fp16.D0-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712623372.918923, "return_code": 1}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/convnext_large-fp16.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/convnext_large-fp16.D1.data
new file mode 100644
index 000000000..c0d50b8c2
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/convnext_large-fp16.D1.data
@@ -0,0 +1,72 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "torchvision", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision", "plan": {"method": "per_gpu"}, "argv": {"--precision": "fp16", "--lr": 0.01, "--no-stdout": true, "--epochs": 50, "--model": "convnext_large", "--batch-size": 128}, "tags": ["classification", "convnet", "precision-showcase", "vision"], "weight": 0.0, "name": "convnext_large-fp16", "tag": ["convnext_large-fp16", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712623367.362583, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-fp16.D1-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712623367.3895981}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "progress": [0, 50]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"}
+{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py:456: UserWarning: Applied workaround for CuDNN issue, install nvrtc.so (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:80.)\n", "pipe": "stderr"}
+{"event": "line", "data": " return F.conv2d(input, weight, bias, self.stride,\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [10830.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24374.375, 24512.0], "load": 0.09, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 148.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 137.62 MiB is free. Including non-PyTorch memory, this process has 21.47 GiB memory in use. Of the allocated memory 21.13 GiB is allocated by PyTorch, and 53.29 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"}
+{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"}
+{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"}
+{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"}
+{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 224, in \n", "pipe": "stderr"}
+{"event": "line", "data": " main()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 218, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " train_epoch(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 51, in train_epoch\n", "pipe": "stderr"}
+{"event": "line", "data": " output = model(inp)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 176, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._forward_impl(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 170, in _forward_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " x = self.features(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 63, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " result = self.layer_scale * self.block(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/activation.py\", line 682, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return F.gelu(input, approximate=self.approximate)\n", "pipe": "stderr"}
+{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 148.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 137.62 MiB is free. Including non-PyTorch memory, this process has 21.47 GiB memory in use. Of the allocated memory 21.13 GiB is allocated by PyTorch, and 53.29 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-fp16.D1-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712623372.9969852, "return_code": 1}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/convnext_large-fp32.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/convnext_large-fp32.D0.data
new file mode 100644
index 000000000..9f2bfa90c
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/convnext_large-fp32.D0.data
@@ -0,0 +1,72 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "torchvision", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision", "plan": {"method": "per_gpu"}, "argv": {"--precision": "fp32", "--lr": 0.01, "--no-stdout": true, "--epochs": 50, "--model": "convnext_large", "--batch-size": 128}, "tags": ["classification", "convnet", "precision-showcase", "vision"], "weight": 0.0, "name": "convnext_large-fp32", "tag": ["convnext_large-fp32", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712623354.367836, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-fp32.D0-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "fp32", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712623356.7626548}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "progress": [0, 50]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"}
+{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py:456: UserWarning: Applied workaround for CuDNN issue, install nvrtc.so (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:80.)\n", "pipe": "stderr"}
+{"event": "line", "data": " return F.conv2d(input, weight, bias, self.stride,\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3826.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24408.375, 24512.0], "load": 0.08, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 294.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 103.62 MiB is free. Including non-PyTorch memory, this process has 21.50 GiB memory in use. Of the allocated memory 21.17 GiB is allocated by PyTorch, and 47.55 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"}
+{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"}
+{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"}
+{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"}
+{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 224, in \n", "pipe": "stderr"}
+{"event": "line", "data": " main()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 218, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " train_epoch(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 51, in train_epoch\n", "pipe": "stderr"}
+{"event": "line", "data": " output = model(inp)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 176, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._forward_impl(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 170, in _forward_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " x = self.features(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 63, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " result = self.layer_scale * self.block(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/linear.py\", line 114, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return F.linear(input, self.weight, self.bias)\n", "pipe": "stderr"}
+{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 294.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 103.62 MiB is free. Including non-PyTorch memory, this process has 21.50 GiB memory in use. Of the allocated memory 21.17 GiB is allocated by PyTorch, and 47.55 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-fp32.D0-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "fp32", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712623362.3638864, "return_code": 1}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/convnext_large-fp32.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/convnext_large-fp32.D1.data
new file mode 100644
index 000000000..332ff3dcd
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/convnext_large-fp32.D1.data
@@ -0,0 +1,72 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "torchvision", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision", "plan": {"method": "per_gpu"}, "argv": {"--precision": "fp32", "--lr": 0.01, "--no-stdout": true, "--epochs": 50, "--model": "convnext_large", "--batch-size": 128}, "tags": ["classification", "convnet", "precision-showcase", "vision"], "weight": 0.0, "name": "convnext_large-fp32", "tag": ["convnext_large-fp32", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712623356.745656, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-fp32.D1-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "fp32", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712623356.771129}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "progress": [0, 50]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"}
+{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py:456: UserWarning: Applied workaround for CuDNN issue, install nvrtc.so (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:80.)\n", "pipe": "stderr"}
+{"event": "line", "data": " return F.conv2d(input, weight, bias, self.stride,\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [7694.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24408.375, 24512.0], "load": 0.07, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 294.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 103.62 MiB is free. Including non-PyTorch memory, this process has 21.50 GiB memory in use. Of the allocated memory 21.17 GiB is allocated by PyTorch, and 47.55 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"}
+{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"}
+{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"}
+{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"}
+{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 224, in \n", "pipe": "stderr"}
+{"event": "line", "data": " main()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 218, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " train_epoch(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 51, in train_epoch\n", "pipe": "stderr"}
+{"event": "line", "data": " output = model(inp)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 176, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._forward_impl(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 170, in _forward_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " x = self.features(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 63, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " result = self.layer_scale * self.block(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/linear.py\", line 114, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return F.linear(input, self.weight, self.bias)\n", "pipe": "stderr"}
+{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 294.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 103.62 MiB is free. Including non-PyTorch memory, this process has 21.50 GiB memory in use. Of the allocated memory 21.17 GiB is allocated by PyTorch, and 47.55 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-fp32.D1-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "fp32", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712623362.598034, "return_code": 1}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/convnext_large-tf32-fp16.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/convnext_large-tf32-fp16.D0.data
new file mode 100644
index 000000000..609205a4c
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/convnext_large-tf32-fp16.D0.data
@@ -0,0 +1,72 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "torchvision", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision", "plan": {"method": "per_gpu"}, "argv": {"--precision": "tf32-fp16", "--lr": 0.01, "--no-stdout": true, "--epochs": 50, "--model": "convnext_large", "--batch-size": 128}, "tags": ["classification", "convnet", "precision-showcase", "vision"], "weight": 3.0, "name": "convnext_large-tf32-fp16", "tag": ["convnext_large-tf32-fp16", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712623386.059841, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-tf32-fp16.D0-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32-fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712623388.4732454}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "progress": [0, 50]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"}
+{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py:456: UserWarning: Applied workaround for CuDNN issue, install nvrtc.so (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:80.)\n", "pipe": "stderr"}
+{"event": "line", "data": " return F.conv2d(input, weight, bias, self.stride,\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3754.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24374.375, 24512.0], "load": 0.09, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 148.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 137.62 MiB is free. Including non-PyTorch memory, this process has 21.47 GiB memory in use. Of the allocated memory 21.13 GiB is allocated by PyTorch, and 53.29 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"}
+{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"}
+{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"}
+{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"}
+{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 224, in \n", "pipe": "stderr"}
+{"event": "line", "data": " main()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 218, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " train_epoch(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 51, in train_epoch\n", "pipe": "stderr"}
+{"event": "line", "data": " output = model(inp)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 176, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._forward_impl(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 170, in _forward_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " x = self.features(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 63, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " result = self.layer_scale * self.block(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/activation.py\", line 682, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return F.gelu(input, approximate=self.approximate)\n", "pipe": "stderr"}
+{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 148.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 137.62 MiB is free. Including non-PyTorch memory, this process has 21.47 GiB memory in use. Of the allocated memory 21.13 GiB is allocated by PyTorch, and 53.29 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-tf32-fp16.D0-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32-fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712623394.4441423, "return_code": 1}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/convnext_large-tf32-fp16.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/convnext_large-tf32-fp16.D1.data
new file mode 100644
index 000000000..04fcbc148
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/convnext_large-tf32-fp16.D1.data
@@ -0,0 +1,72 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "torchvision", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision", "plan": {"method": "per_gpu"}, "argv": {"--precision": "tf32-fp16", "--lr": 0.01, "--no-stdout": true, "--epochs": 50, "--model": "convnext_large", "--batch-size": 128}, "tags": ["classification", "convnet", "precision-showcase", "vision"], "weight": 3.0, "name": "convnext_large-tf32-fp16", "tag": ["convnext_large-tf32-fp16", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712623388.455584, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-tf32-fp16.D1-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32-fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712623388.482006}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "progress": [0, 50]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"}
+{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py:456: UserWarning: Applied workaround for CuDNN issue, install nvrtc.so (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:80.)\n", "pipe": "stderr"}
+{"event": "line", "data": " return F.conv2d(input, weight, bias, self.stride,\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3444.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24374.375, 24512.0], "load": 0.08, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 148.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 137.62 MiB is free. Including non-PyTorch memory, this process has 21.47 GiB memory in use. Of the allocated memory 21.13 GiB is allocated by PyTorch, and 53.29 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"}
+{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"}
+{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"}
+{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"}
+{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 224, in \n", "pipe": "stderr"}
+{"event": "line", "data": " main()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 218, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " train_epoch(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 51, in train_epoch\n", "pipe": "stderr"}
+{"event": "line", "data": " output = model(inp)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 176, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._forward_impl(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 170, in _forward_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " x = self.features(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 63, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " result = self.layer_scale * self.block(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/activation.py\", line 682, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return F.gelu(input, approximate=self.approximate)\n", "pipe": "stderr"}
+{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 148.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 137.62 MiB is free. Including non-PyTorch memory, this process has 21.47 GiB memory in use. Of the allocated memory 21.13 GiB is allocated by PyTorch, and 53.29 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-tf32-fp16.D1-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32-fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712623394.365653, "return_code": 1}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/convnext_large-tf32.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/convnext_large-tf32.D0.data
new file mode 100644
index 000000000..06c32aaa9
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/convnext_large-tf32.D0.data
@@ -0,0 +1,72 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "torchvision", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision", "plan": {"method": "per_gpu"}, "argv": {"--precision": "tf32", "--lr": 0.01, "--no-stdout": true, "--epochs": 50, "--model": "convnext_large", "--batch-size": 128}, "tags": ["classification", "convnet", "precision-showcase", "vision"], "weight": 0.0, "name": "convnext_large-tf32", "tag": ["convnext_large-tf32", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712623375.385025, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-tf32.D0-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712623377.7582028}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "progress": [0, 50]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"}
+{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py:456: UserWarning: Applied workaround for CuDNN issue, install nvrtc.so (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:80.)\n", "pipe": "stderr"}
+{"event": "line", "data": " return F.conv2d(input, weight, bias, self.stride,\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3826.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24408.375, 24512.0], "load": 0.03, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 294.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 103.62 MiB is free. Including non-PyTorch memory, this process has 21.50 GiB memory in use. Of the allocated memory 21.17 GiB is allocated by PyTorch, and 47.55 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"}
+{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"}
+{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"}
+{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"}
+{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 224, in \n", "pipe": "stderr"}
+{"event": "line", "data": " main()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 218, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " train_epoch(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 51, in train_epoch\n", "pipe": "stderr"}
+{"event": "line", "data": " output = model(inp)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 176, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._forward_impl(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 170, in _forward_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " x = self.features(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 63, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " result = self.layer_scale * self.block(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/linear.py\", line 114, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return F.linear(input, self.weight, self.bias)\n", "pipe": "stderr"}
+{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 294.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 103.62 MiB is free. Including non-PyTorch memory, this process has 21.50 GiB memory in use. Of the allocated memory 21.17 GiB is allocated by PyTorch, and 47.55 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-tf32.D0-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712623383.6454434, "return_code": 1}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/convnext_large-tf32.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/convnext_large-tf32.D1.data
new file mode 100644
index 000000000..41947d735
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/convnext_large-tf32.D1.data
@@ -0,0 +1,72 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "torchvision", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision", "plan": {"method": "per_gpu"}, "argv": {"--precision": "tf32", "--lr": 0.01, "--no-stdout": true, "--epochs": 50, "--model": "convnext_large", "--batch-size": 128}, "tags": ["classification", "convnet", "precision-showcase", "vision"], "weight": 0.0, "name": "convnext_large-tf32", "tag": ["convnext_large-tf32", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712623377.740523, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-tf32.D1-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712623377.766428}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "progress": [0, 50]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"}
+{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py:456: UserWarning: Applied workaround for CuDNN issue, install nvrtc.so (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:80.)\n", "pipe": "stderr"}
+{"event": "line", "data": " return F.conv2d(input, weight, bias, self.stride,\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3826.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24408.375, 24512.0], "load": 0.06, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 294.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 103.62 MiB is free. Including non-PyTorch memory, this process has 21.50 GiB memory in use. Of the allocated memory 21.17 GiB is allocated by PyTorch, and 47.55 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"}
+{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"}
+{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"}
+{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"}
+{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 224, in \n", "pipe": "stderr"}
+{"event": "line", "data": " main()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 218, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " train_epoch(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 51, in train_epoch\n", "pipe": "stderr"}
+{"event": "line", "data": " output = model(inp)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 176, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._forward_impl(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 170, in _forward_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " x = self.features(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 63, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " result = self.layer_scale * self.block(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/linear.py\", line 114, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return F.linear(input, self.weight, self.bias)\n", "pipe": "stderr"}
+{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 294.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 103.62 MiB is free. Including non-PyTorch memory, this process has 21.50 GiB memory in use. Of the allocated memory 21.17 GiB is allocated by PyTorch, and 47.55 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-tf32.D1-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712623383.4317787, "return_code": 1}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/davit_large-multi.0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/davit_large-multi.0.data
new file mode 100644
index 000000000..4325d5dc6
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/davit_large-multi.0.data
@@ -0,0 +1,232 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "timm", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm", "plan": {"method": "njobs", "n": 1}, "argv": {"--amp": true, "--model": "davit_large", "--batch-size": 128, "--lr-base": 0.01}, "tags": ["classification", "multigpu", "transformer", "vision"], "weight": 5.0, "name": "davit_large-multi", "tag": ["davit_large-multi", "0"], "job-number": 0, "devices": ["0", "1"]}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712623520.930649, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["torchrun", "--nproc_per_node=2", "--", "-m", "voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-davit_large-multi.0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "davit_large", "--batch-size", "128", "--lr-base", "0.01", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/vodofeze.2024-04-09_00:35:28.669482/davit_large-multi.0", "--checkpoint-hist", "1"], "time": 1712623520.9466906}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "line", "data": "Training in distributed mode with multiple processes, 1 device per process.Process 1, total 2, device cuda:1.\n", "pipe": "stderr"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "line", "data": "Training in distributed mode with multiple processes, 1 device per process.Process 0, total 2, device cuda:0.\n", "pipe": "stderr"}
+{"event": "line", "data": "Model davit_large created, param count:196811752\n", "pipe": "stderr"}
+{"event": "line", "data": "Data processing configuration for current model + dataset:\n", "pipe": "stderr"}
+{"event": "line", "data": "\tinput_size: (3, 224, 224)\n", "pipe": "stderr"}
+{"event": "line", "data": "\tinterpolation: bicubic\n", "pipe": "stderr"}
+{"event": "line", "data": "\tmean: (0.485, 0.456, 0.406)\n", "pipe": "stderr"}
+{"event": "line", "data": "\tstd: (0.229, 0.224, 0.225)\n", "pipe": "stderr"}
+{"event": "line", "data": "\tcrop_pct: 0.95\n", "pipe": "stderr"}
+{"event": "line", "data": "\tcrop_mode: center\n", "pipe": "stderr"}
+{"event": "line", "data": "Learning rate (0.01) calculated from base learning rate (0.01) and global batch size (256) with linear scaling.\n", "pipe": "stderr"}
+{"event": "line", "data": "Using native Torch AMP. Training in mixed precision.\n", "pipe": "stderr"}
+{"event": "line", "data": "Using native Torch DistributedDataParallel.\n", "pipe": "stderr"}
+{"event": "line", "data": "Scheduled epochs: 300. LR stepped per epoch.\n", "pipe": "stderr"}
+{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py:456: UserWarning: Applied workaround for CuDNN issue, install nvrtc.so (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:80.)\n", "pipe": "stderr"}
+{"event": "line", "data": " return F.conv2d(input, weight, bias, self.stride,\n", "pipe": "stderr"}
+{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py:456: UserWarning: Applied workaround for CuDNN issue, install nvrtc.so (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:80.)\n", "pipe": "stderr"}
+{"event": "line", "data": " return F.conv2d(input, weight, bias, self.stride,\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7064.3125, 24512.0], "load": 0.06, "temperature": null, "power": null}, "1": {"memory": [7064.3125, 24512.0], "load": 0.06, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24346.3125, 24512.0], "load": 0.39, "temperature": null, "power": null}, "1": {"memory": [24346.3125, 24512.0], "load": 0.41, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 148.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 165.69 MiB is free. Including non-PyTorch memory, this process has 21.44 GiB memory in use. Of the allocated memory 20.89 GiB is allocated by PyTorch, and 162.76 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/home/ubuntu/miniconda3/lib/python3.10/runpy.py\", line 196, in _run_module_as_main\n", "pipe": "stderr"}
+{"event": "line", "data": " return _run_code(code, main_globals, None,\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/home/ubuntu/miniconda3/lib/python3.10/runpy.py\", line 86, in _run_code\n", "pipe": "stderr"}
+{"event": "line", "data": " exec(code, run_globals)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/__main__.py\", line 4, in \n", "pipe": "stderr"}
+{"event": "line", "data": " main()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"}
+{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"}
+{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"}
+{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 1035, in \n", "pipe": "stderr"}
+{"event": "line", "data": " main()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 759, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " train_metrics = train_one_epoch(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 874, in train_one_epoch\n", "pipe": "stderr"}
+{"event": "line", "data": " output = model(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/parallel/distributed.py\", line 1519, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " else self._run_ddp_forward(*inputs, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/parallel/distributed.py\", line 1355, in _run_ddp_forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return self.module(*inputs, **kwargs) # type: ignore[index]\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/davit.py\", line 575, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " x = self.forward_features(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/davit.py\", line 563, in forward_features\n", "pipe": "stderr"}
+{"event": "line", "data": " x = self.stages(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/davit.py\", line 433, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " x = self.blocks(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/davit.py\", line 186, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " x = x + self.drop_path2(self.mlp(self.norm2(x)))\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/layers/mlp.py\", line 41, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " x = self.act(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/activation.py\", line 682, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return F.gelu(input, approximate=self.approximate)\n", "pipe": "stderr"}
+{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 148.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 165.69 MiB is free. Including non-PyTorch memory, this process has 21.44 GiB memory in use. Of the allocated memory 20.89 GiB is allocated by PyTorch, and 162.76 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"}
+{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 148.00 MiB. GPU 1 has a total capacty of 23.73 GiB of which 165.69 MiB is free. Including non-PyTorch memory, this process has 21.44 GiB memory in use. Of the allocated memory 20.89 GiB is allocated by PyTorch, and 162.76 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/home/ubuntu/miniconda3/lib/python3.10/runpy.py\", line 196, in _run_module_as_main\n", "pipe": "stderr"}
+{"event": "line", "data": " return _run_code(code, main_globals, None,\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/home/ubuntu/miniconda3/lib/python3.10/runpy.py\", line 86, in _run_code\n", "pipe": "stderr"}
+{"event": "line", "data": " exec(code, run_globals)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/__main__.py\", line 4, in \n", "pipe": "stderr"}
+{"event": "line", "data": " main()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"}
+{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"}
+{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"}
+{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 1035, in \n", "pipe": "stderr"}
+{"event": "line", "data": " main()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 759, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " train_metrics = train_one_epoch(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 874, in train_one_epoch\n", "pipe": "stderr"}
+{"event": "line", "data": " output = model(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/parallel/distributed.py\", line 1519, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " else self._run_ddp_forward(*inputs, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/parallel/distributed.py\", line 1355, in _run_ddp_forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return self.module(*inputs, **kwargs) # type: ignore[index]\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/davit.py\", line 575, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " x = self.forward_features(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/davit.py\", line 563, in forward_features\n", "pipe": "stderr"}
+{"event": "line", "data": " x = self.stages(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/davit.py\", line 433, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " x = self.blocks(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/davit.py\", line 186, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " x = x + self.drop_path2(self.mlp(self.norm2(x)))\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/layers/mlp.py\", line 41, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " x = self.act(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/activation.py\", line 682, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return F.gelu(input, approximate=self.approximate)\n", "pipe": "stderr"}
+{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 148.00 MiB. GPU 1 has a total capacty of 23.73 GiB of which 165.69 MiB is free. Including non-PyTorch memory, this process has 21.44 GiB memory in use. Of the allocated memory 20.89 GiB is allocated by PyTorch, and 162.76 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"}
+{"event": "line", "data": "[2024-04-09 00:45:31,977] torch.distributed.elastic.multiprocessing.api: [ERROR] failed (exitcode: 1) local_rank: 0 (pid: 51458) of binary: /mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/python\n", "pipe": "stderr"}
+{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/torchrun\", line 8, in \n", "pipe": "stderr"}
+{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py\", line 346, in wrapper\n", "pipe": "stderr"}
+{"event": "line", "data": " return f(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/distributed/run.py\", line 806, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " run(args)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/distributed/run.py\", line 797, in run\n", "pipe": "stderr"}
+{"event": "line", "data": " elastic_launch(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/distributed/launcher/api.py\", line 134, in __call__\n", "pipe": "stderr"}
+{"event": "line", "data": " return launch_agent(self._config, self._entrypoint, list(args))\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/distributed/launcher/api.py\", line 264, in launch_agent\n", "pipe": "stderr"}
+{"event": "line", "data": " raise ChildFailedError(\n", "pipe": "stderr"}
+{"event": "line", "data": "torch.distributed.elastic.multiprocessing.errors.ChildFailedError: \n", "pipe": "stderr"}
+{"event": "line", "data": "============================================================\n", "pipe": "stderr"}
+{"event": "line", "data": "voir FAILED\n", "pipe": "stderr"}
+{"event": "line", "data": "------------------------------------------------------------\n", "pipe": "stderr"}
+{"event": "line", "data": "Failures:\n", "pipe": "stderr"}
+{"event": "line", "data": "[1]:\n", "pipe": "stderr"}
+{"event": "line", "data": " time : 2024-04-09_00:45:31\n", "pipe": "stderr"}
+{"event": "line", "data": " host : delicatemastodon.internal.cloudapp.net\n", "pipe": "stderr"}
+{"event": "line", "data": " rank : 1 (local_rank: 1)\n", "pipe": "stderr"}
+{"event": "line", "data": " exitcode : 1 (pid: 51459)\n", "pipe": "stderr"}
+{"event": "line", "data": " error_file: \n", "pipe": "stderr"}
+{"event": "line", "data": " traceback : To enable traceback see: https://pytorch.org/docs/stable/elastic/errors.html\n", "pipe": "stderr"}
+{"event": "line", "data": "------------------------------------------------------------\n", "pipe": "stderr"}
+{"event": "line", "data": "Root Cause (first observed failure):\n", "pipe": "stderr"}
+{"event": "line", "data": "[0]:\n", "pipe": "stderr"}
+{"event": "line", "data": " time : 2024-04-09_00:45:31\n", "pipe": "stderr"}
+{"event": "line", "data": " host : delicatemastodon.internal.cloudapp.net\n", "pipe": "stderr"}
+{"event": "line", "data": " rank : 0 (local_rank: 0)\n", "pipe": "stderr"}
+{"event": "line", "data": " exitcode : 1 (pid: 51458)\n", "pipe": "stderr"}
+{"event": "line", "data": " error_file: \n", "pipe": "stderr"}
+{"event": "line", "data": " traceback : To enable traceback see: https://pytorch.org/docs/stable/elastic/errors.html\n", "pipe": "stderr"}
+{"event": "line", "data": "============================================================\n", "pipe": "stderr"}
+{"event": "end", "data": {"command": ["torchrun", "--nproc_per_node=2", "--", "-m", "voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-davit_large-multi.0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "davit_large", "--batch-size", "128", "--lr-base", "0.01", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/vodofeze.2024-04-09_00:35:28.669482/davit_large-multi.0", "--checkpoint-hist", "1"], "time": 1712623532.3300037, "return_code": 1}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/davit_large.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/davit_large.D0.data
new file mode 100644
index 000000000..bea27f08b
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/davit_large.D0.data
@@ -0,0 +1,94 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "timm", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm", "plan": {"method": "per_gpu"}, "argv": {"--amp": true, "--model": "davit_large", "--batch-size": 128, "--lr-base": 0.01}, "tags": ["classification", "transformer", "vision"], "weight": 1.0, "name": "davit_large", "tag": ["davit_large", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712623508.254633, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-davit_large.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "davit_large", "--batch-size", "128", "--lr-base", "0.01", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/vodofeze.2024-04-09_00:35:28.669482/davit_large.D0", "--checkpoint-hist", "1"], "time": 1712623510.6078093}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "line", "data": "Training with a single process on 1 device (cuda:0).\n", "pipe": "stderr"}
+{"event": "line", "data": "Model davit_large created, param count:196811752\n", "pipe": "stderr"}
+{"event": "line", "data": "Data processing configuration for current model + dataset:\n", "pipe": "stderr"}
+{"event": "line", "data": "\tinput_size: (3, 224, 224)\n", "pipe": "stderr"}
+{"event": "line", "data": "\tinterpolation: bicubic\n", "pipe": "stderr"}
+{"event": "line", "data": "\tmean: (0.485, 0.456, 0.406)\n", "pipe": "stderr"}
+{"event": "line", "data": "\tstd: (0.229, 0.224, 0.225)\n", "pipe": "stderr"}
+{"event": "line", "data": "\tcrop_pct: 0.95\n", "pipe": "stderr"}
+{"event": "line", "data": "\tcrop_mode: center\n", "pipe": "stderr"}
+{"event": "line", "data": "Learning rate (0.005) calculated from base learning rate (0.01) and global batch size (128) with linear scaling.\n", "pipe": "stderr"}
+{"event": "line", "data": "Using native Torch AMP. Training in mixed precision.\n", "pipe": "stderr"}
+{"event": "line", "data": "Scheduled epochs: 300. LR stepped per epoch.\n", "pipe": "stderr"}
+{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py:456: UserWarning: Applied workaround for CuDNN issue, install nvrtc.so (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:80.)\n", "pipe": "stderr"}
+{"event": "line", "data": " return F.conv2d(input, weight, bias, self.stride,\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6024.375, 24512.0], "load": 0.1, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24418.375, 24512.0], "load": 0.36, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 148.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 93.62 MiB is free. Including non-PyTorch memory, this process has 21.51 GiB memory in use. Of the allocated memory 21.02 GiB is allocated by PyTorch, and 188.39 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"}
+{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"}
+{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"}
+{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"}
+{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 1035, in \n", "pipe": "stderr"}
+{"event": "line", "data": " main()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 759, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " train_metrics = train_one_epoch(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 874, in train_one_epoch\n", "pipe": "stderr"}
+{"event": "line", "data": " output = model(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/davit.py\", line 575, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " x = self.forward_features(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/davit.py\", line 563, in forward_features\n", "pipe": "stderr"}
+{"event": "line", "data": " x = self.stages(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/davit.py\", line 433, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " x = self.blocks(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/davit.py\", line 353, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " x = x + self.drop_path2(self.mlp(self.norm2(x)))\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/layers/mlp.py\", line 40, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " x = self.fc1(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/linear.py\", line 114, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return F.linear(input, self.weight, self.bias)\n", "pipe": "stderr"}
+{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 148.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 93.62 MiB is free. Including non-PyTorch memory, this process has 21.51 GiB memory in use. Of the allocated memory 21.02 GiB is allocated by PyTorch, and 188.39 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-davit_large.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "davit_large", "--batch-size", "128", "--lr-base", "0.01", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/vodofeze.2024-04-09_00:35:28.669482/davit_large.D0", "--checkpoint-hist", "1"], "time": 1712623518.324035, "return_code": 1}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/davit_large.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/davit_large.D1.data
new file mode 100644
index 000000000..1806a17e7
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/davit_large.D1.data
@@ -0,0 +1,94 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "timm", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm", "plan": {"method": "per_gpu"}, "argv": {"--amp": true, "--model": "davit_large", "--batch-size": 128, "--lr-base": 0.01}, "tags": ["classification", "transformer", "vision"], "weight": 1.0, "name": "davit_large", "tag": ["davit_large", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712623510.591168, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-davit_large.D1-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "davit_large", "--batch-size", "128", "--lr-base", "0.01", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/vodofeze.2024-04-09_00:35:28.669482/davit_large.D1", "--checkpoint-hist", "1"], "time": 1712623510.6157527}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "line", "data": "Training with a single process on 1 device (cuda:0).\n", "pipe": "stderr"}
+{"event": "line", "data": "Model davit_large created, param count:196811752\n", "pipe": "stderr"}
+{"event": "line", "data": "Data processing configuration for current model + dataset:\n", "pipe": "stderr"}
+{"event": "line", "data": "\tinput_size: (3, 224, 224)\n", "pipe": "stderr"}
+{"event": "line", "data": "\tinterpolation: bicubic\n", "pipe": "stderr"}
+{"event": "line", "data": "\tmean: (0.485, 0.456, 0.406)\n", "pipe": "stderr"}
+{"event": "line", "data": "\tstd: (0.229, 0.224, 0.225)\n", "pipe": "stderr"}
+{"event": "line", "data": "\tcrop_pct: 0.95\n", "pipe": "stderr"}
+{"event": "line", "data": "\tcrop_mode: center\n", "pipe": "stderr"}
+{"event": "line", "data": "Learning rate (0.005) calculated from base learning rate (0.01) and global batch size (128) with linear scaling.\n", "pipe": "stderr"}
+{"event": "line", "data": "Using native Torch AMP. Training in mixed precision.\n", "pipe": "stderr"}
+{"event": "line", "data": "Scheduled epochs: 300. LR stepped per epoch.\n", "pipe": "stderr"}
+{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py:456: UserWarning: Applied workaround for CuDNN issue, install nvrtc.so (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:80.)\n", "pipe": "stderr"}
+{"event": "line", "data": " return F.conv2d(input, weight, bias, self.stride,\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [6024.375, 24512.0], "load": 0.06, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24418.375, 24512.0], "load": 0.33, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 148.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 93.62 MiB is free. Including non-PyTorch memory, this process has 21.51 GiB memory in use. Of the allocated memory 21.02 GiB is allocated by PyTorch, and 188.39 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"}
+{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"}
+{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"}
+{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"}
+{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 1035, in \n", "pipe": "stderr"}
+{"event": "line", "data": " main()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 759, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " train_metrics = train_one_epoch(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 874, in train_one_epoch\n", "pipe": "stderr"}
+{"event": "line", "data": " output = model(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/davit.py\", line 575, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " x = self.forward_features(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/davit.py\", line 563, in forward_features\n", "pipe": "stderr"}
+{"event": "line", "data": " x = self.stages(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/davit.py\", line 433, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " x = self.blocks(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/davit.py\", line 353, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " x = x + self.drop_path2(self.mlp(self.norm2(x)))\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/layers/mlp.py\", line 40, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " x = self.fc1(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/linear.py\", line 114, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return F.linear(input, self.weight, self.bias)\n", "pipe": "stderr"}
+{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 148.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 93.62 MiB is free. Including non-PyTorch memory, this process has 21.51 GiB memory in use. Of the allocated memory 21.02 GiB is allocated by PyTorch, and 188.39 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-davit_large.D1-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "davit_large", "--batch-size", "128", "--lr-base", "0.01", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/vodofeze.2024-04-09_00:35:28.669482/davit_large.D1", "--checkpoint-hist", "1"], "time": 1712623518.567284, "return_code": 1}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/dlrm.0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/dlrm.0.data
new file mode 100644
index 000000000..e687b835d
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/dlrm.0.data
@@ -0,0 +1,271 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/dlrm", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "dlrm", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "tags": ["nlp", "rl"], "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/dlrm", "plan": {"method": "njobs", "n": 1}, "argv": {"--num-batches": 1000, "--data-generation": "random", "--arch-mlp-bot": "512-512-64", "--arch-mlp-top": "1024-1024-1024-1", "--arch-sparse-feature-size": 64, "--arch-embedding-size": "1000000-1000000-1000000-1000000-1000000-1000000-1000000-1000000", "--num-indices-per-lookup": 100, "--arch-interaction-op": "dot", "--numpy-rand-seed": "727", "--print-freq": 999999, "--mini-batch-size": 16384, "--test-mini-batch-size": 16384, "--test-num-workers": 0, "--use-gpu": true}, "weight": 1.0, "name": "dlrm", "tag": ["dlrm", "0"], "job-number": 0, "devices": ["0", "1"]}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712623684.095908, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/dlrm/voirconf-dlrm.0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/dlrm/dlrm/dlrm_s_pytorch.py", "--num-batches", "1000", "--data-generation", "random", "--arch-mlp-bot", "512-512-64", "--arch-mlp-top", "1024-1024-1024-1", "--arch-sparse-feature-size", "64", "--arch-embedding-size", "1000000-1000000-1000000-1000000-1000000-1000000-1000000-1000000", "--num-indices-per-lookup", "100", "--arch-interaction-op", "dot", "--numpy-rand-seed", "727", "--print-freq", "999999", "--mini-batch-size", "16384", "--test-mini-batch-size", "16384", "--test-num-workers", "0", "--use-gpu"], "time": 1712623684.113308}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "line", "data": "Unable to import mlperf_logging, No module named 'mlperf_logging'\n", "pipe": "stdout"}
+{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py:347: UserWarning: torch.distributed.reduce_op is deprecated, please use torch.distributed.ReduceOp instead\n", "pipe": "stderr"}
+{"event": "line", "data": " warnings.warn(\n", "pipe": "stderr"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "line", "data": "world size: 1, current rank: 0, local rank: 0\n", "pipe": "stdout"}
+{"event": "line", "data": "Using 2 GPU(s)...\n", "pipe": "stdout"}
+{"event": "line", "data": "time/loss/accuracy (if enabled):\n", "pipe": "stdout"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2642.375, 24512.0], "load": 0, "temperature": null, "power": null}, "1": {"memory": [2393.125, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2642.375, 24512.0], "load": 0, "temperature": null, "power": null}, "1": {"memory": [2393.125, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2642.375, 24512.0], "load": 0, "temperature": null, "power": null}, "1": {"memory": [2393.125, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.0887361615896225}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4954.375, 24512.0], "load": 0.03, "temperature": null, "power": null}, "1": {"memory": [4936.375, 24512.0], "load": 0.03, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08788755536079407}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4962.375, 24512.0], "load": 0, "temperature": null, "power": null}, "1": {"memory": [4944.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08937834203243256}, "pipe": "data"}
+{"event": "data", "data": {"rate": 374858.6443835655, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4962.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5348.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08813147246837616}, "pipe": "data"}
+{"event": "data", "data": {"rate": 370580.520809485, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5164.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5348.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08771650493144989}, "pipe": "data"}
+{"event": "data", "data": {"rate": 376167.0269763922, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5164.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5348.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08742949366569519}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5366.375, 24512.0], "load": 0, "temperature": null, "power": null}, "1": {"memory": [5348.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 364000.0809580258, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08757737278938293}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5366.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5348.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 372341.30434767785, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08654382824897766}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5366.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5550.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 368779.28609246074, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.0858154445886612}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5366.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5550.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 366422.3478270399, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08628799766302109}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5366.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5550.375, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 376153.86529779475, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5366.375, 24512.0], "load": 0, "temperature": null, "power": null}, "1": {"memory": [5550.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08730762451887131}, "pipe": "data"}
+{"event": "data", "data": {"rate": 367143.35022923263, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5366.375, 24512.0], "load": 0, "temperature": null, "power": null}, "1": {"memory": [5550.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08590050786733627}, "pipe": "data"}
+{"event": "data", "data": {"rate": 364914.34947502473, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5366.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5550.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08602667599916458}, "pipe": "data"}
+{"event": "data", "data": {"rate": 372269.73132390995, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5366.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5550.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08581672608852386}, "pipe": "data"}
+{"event": "data", "data": {"rate": 359851.8650820826, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5366.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5550.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08588778972625732}, "pipe": "data"}
+{"event": "data", "data": {"rate": 375225.8592177494, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5366.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5550.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08632193505764008}, "pipe": "data"}
+{"event": "data", "data": {"rate": 368972.10348847765, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5366.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5550.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08664406836032867}, "pipe": "data"}
+{"event": "data", "data": {"rate": 371890.44802879787, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5366.375, 24512.0], "load": 0, "temperature": null, "power": null}, "1": {"memory": [5550.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.0852133184671402}, "pipe": "data"}
+{"event": "data", "data": {"rate": 379018.6350674685, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5366.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5550.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08576950430870056}, "pipe": "data"}
+{"event": "data", "data": {"rate": 367169.6462036745, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5568.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5550.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08471724390983582}, "pipe": "data"}
+{"event": "data", "data": {"rate": 370772.46632440516, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5568.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5550.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08532554656267166}, "pipe": "data"}
+{"event": "data", "data": {"rate": 368970.87370857096, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5568.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5550.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08427020162343979}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5568.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5550.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 373874.33509459393, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08591149747371674}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5568.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5550.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 379221.4424543034, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.0845126137137413}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5568.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5550.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 365862.1577252217, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.0840827077627182}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5568.375, 24512.0], "load": 0, "temperature": null, "power": null}, "1": {"memory": [5550.375, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 372515.69896520715, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5568.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5550.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08428709208965302}, "pipe": "data"}
+{"event": "data", "data": {"rate": 378647.5418843599, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5568.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5550.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08434845507144928}, "pipe": "data"}
+{"event": "data", "data": {"rate": 373454.0032267848, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5568.375, 24512.0], "load": 0, "temperature": null, "power": null}, "1": {"memory": [5550.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08465415984392166}, "pipe": "data"}
+{"event": "data", "data": {"rate": 376229.0650822549, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5772.375, 24512.0], "load": 0, "temperature": null, "power": null}, "1": {"memory": [5550.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08376755565404892}, "pipe": "data"}
+{"event": "data", "data": {"rate": 375583.30719257623, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5772.375, 24512.0], "load": 0, "temperature": null, "power": null}, "1": {"memory": [5550.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.083040751516819}, "pipe": "data"}
+{"event": "data", "data": {"rate": 377734.8891289079, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5772.375, 24512.0], "load": 0, "temperature": null, "power": null}, "1": {"memory": [5550.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08479100465774536}, "pipe": "data"}
+{"event": "data", "data": {"rate": 364603.27929761703, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5772.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5550.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08379324525594711}, "pipe": "data"}
+{"event": "data", "data": {"rate": 373809.7192626037, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5772.375, 24512.0], "load": 0, "temperature": null, "power": null}, "1": {"memory": [5550.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08349813520908356}, "pipe": "data"}
+{"event": "data", "data": {"rate": 370005.54059810366, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5772.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5550.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08437865972518921}, "pipe": "data"}
+{"event": "data", "data": {"rate": 377784.6311705755, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5772.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5550.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08468881249427795}, "pipe": "data"}
+{"event": "data", "data": {"rate": 379094.502331452, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5772.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5550.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08403709530830383}, "pipe": "data"}
+{"event": "data", "data": {"rate": 366551.07764204656, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5976.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5550.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08420669287443161}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5976.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5550.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 376381.6127290497, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08320620656013489}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5976.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5550.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 380564.84333429433, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08308559656143188}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5976.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5550.375, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 373736.84746947035, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08385886251926422}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5976.375, 24512.0], "load": 0, "temperature": null, "power": null}, "1": {"memory": [5550.375, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 371668.88585173787, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5976.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5550.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08402976393699646}, "pipe": "data"}
+{"event": "data", "data": {"rate": 368483.49676804396, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5976.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5550.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08415202051401138}, "pipe": "data"}
+{"event": "data", "data": {"rate": 373710.020138691, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5976.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5754.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08402085304260254}, "pipe": "data"}
+{"event": "data", "data": {"rate": 379947.3421368536, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5976.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5754.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08228246867656708}, "pipe": "data"}
+{"event": "data", "data": {"rate": 366047.7077859737, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5976.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5754.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08508101850748062}, "pipe": "data"}
+{"event": "data", "data": {"rate": 370532.02079420997, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5976.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5754.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08269783854484558}, "pipe": "data"}
+{"event": "data", "data": {"rate": 379356.10922520014, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5976.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5754.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08433524519205093}, "pipe": "data"}
+{"event": "data", "data": {"rate": 370239.9317008563, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5976.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5754.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08258543908596039}, "pipe": "data"}
+{"event": "data", "data": {"rate": 373759.9099472217, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5976.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5754.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08313082903623581}, "pipe": "data"}
+{"event": "data", "data": {"rate": 368856.8306436097, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5976.375, 24512.0], "load": 0, "temperature": null, "power": null}, "1": {"memory": [5754.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08317263424396515}, "pipe": "data"}
+{"event": "data", "data": {"rate": 376142.9238422384, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5976.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5754.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08357995748519897}, "pipe": "data"}
+{"event": "data", "data": {"rate": 373656.0361630807, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5976.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5754.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08405735343694687}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5976.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5754.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 371348.51300464355, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08272015303373337}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5976.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5754.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 375528.5569022488, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08338482677936554}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5976.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5754.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 378121.14088361216, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08422383666038513}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5976.375, 24512.0], "load": 0, "temperature": null, "power": null}, "1": {"memory": [5754.375, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 366997.2852863163, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5976.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5754.375, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08405454456806183}, "pipe": "data"}
+{"event": "data", "data": {"rate": 379590.9092425567, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5976.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5754.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08417406678199768}, "pipe": "data"}
+{"event": "data", "data": {"rate": 368304.4939420048, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5976.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5754.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08274278044700623}, "pipe": "data"}
+{"event": "data", "data": {"rate": 376821.2345941489, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5976.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5754.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08461865782737732}, "pipe": "data"}
+{"event": "data", "data": {"rate": 377166.8823259815, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5976.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5754.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08379694819450378}, "pipe": "data"}
+{"event": "data", "data": {"rate": 375817.76031607593, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5976.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5754.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08401922881603241}, "pipe": "data"}
+{"event": "data", "data": {"rate": 377850.5414766152, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5976.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5754.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08402208983898163}, "pipe": "data"}
+{"event": "data", "data": {"rate": 378040.1672292429, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5976.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5754.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"loss": 0.08343112468719482}, "pipe": "data"}
+{"event": "data", "data": {"rate": 372000.0485888735, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5976.375, 24512.0], "load": 0, "temperature": null, "power": null}, "1": {"memory": [5754.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5976.375, 24512.0], "load": 0, "temperature": null, "power": null}, "1": {"memory": [5754.375, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/dlrm/voirconf-dlrm.0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/dlrm/dlrm/dlrm_s_pytorch.py", "--num-batches", "1000", "--data-generation", "random", "--arch-mlp-bot", "512-512-64", "--arch-mlp-top", "1024-1024-1024-1", "--arch-sparse-feature-size", "64", "--arch-embedding-size", "1000000-1000000-1000000-1000000-1000000-1000000-1000000-1000000", "--num-indices-per-lookup", "100", "--arch-interaction-op", "dot", "--numpy-rand-seed", "727", "--print-freq", "999999", "--mini-batch-size", "16384", "--test-mini-batch-size", "16384", "--test-num-workers", "0", "--use-gpu"], "time": 1712623900.470933, "return_code": 0}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/focalnet.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/focalnet.D0.data
new file mode 100644
index 000000000..7af368556
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/focalnet.D0.data
@@ -0,0 +1,253 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "timm", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm", "plan": {"method": "per_gpu"}, "argv": {"--amp": true, "--model": "focalnet_base_lrf"}, "tags": ["classification", "convnet", "vision"], "weight": 2.0, "name": "focalnet", "tag": ["focalnet", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712623534.662914, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-focalnet.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "focalnet_base_lrf", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/vodofeze.2024-04-09_00:35:28.669482/focalnet.D0", "--checkpoint-hist", "1"], "time": 1712623537.0178125}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "line", "data": "Training with a single process on 1 device (cuda:0).\n", "pipe": "stderr"}
+{"event": "line", "data": "Model focalnet_base_lrf created, param count:88749768\n", "pipe": "stderr"}
+{"event": "line", "data": "Data processing configuration for current model + dataset:\n", "pipe": "stderr"}
+{"event": "line", "data": "\tinput_size: (3, 224, 224)\n", "pipe": "stderr"}
+{"event": "line", "data": "\tinterpolation: bicubic\n", "pipe": "stderr"}
+{"event": "line", "data": "\tmean: (0.485, 0.456, 0.406)\n", "pipe": "stderr"}
+{"event": "line", "data": "\tstd: (0.229, 0.224, 0.225)\n", "pipe": "stderr"}
+{"event": "line", "data": "\tcrop_pct: 0.9\n", "pipe": "stderr"}
+{"event": "line", "data": "\tcrop_mode: center\n", "pipe": "stderr"}
+{"event": "line", "data": "Learning rate (0.05) calculated from base learning rate (0.1) and global batch size (128) with linear scaling.\n", "pipe": "stderr"}
+{"event": "line", "data": "Using native Torch AMP. Training in mixed precision.\n", "pipe": "stderr"}
+{"event": "line", "data": "Scheduled epochs: 300. LR stepped per epoch.\n", "pipe": "stderr"}
+{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py:456: UserWarning: Applied workaround for CuDNN issue, install nvrtc.so (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:80.)\n", "pipe": "stderr"}
+{"event": "line", "data": " return F.conv2d(input, weight, bias, self.stride,\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "train", "loss": 7.004446029663086}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5626.375, 24512.0], "load": 0.35, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "line", "data": "Train: 0 [ 0/32 ( 0%)] Loss: 7.004 (7.00) Time: 17.573s, 7.28/s (17.573s, 7.28/s) LR: 1.000e-05 Data: 0.585 (0.585)\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [23512.375, 24512.0], "load": 0.94, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [13582.375, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [11108.375, 24512.0], "load": 0.98, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [10918.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [10618.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.006705284118652}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.01315975189209}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.036317348480225}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [8110.375, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.982916831970215}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.996298789978027}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [22296.375, 24512.0], "load": 0.98, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 144.68807615526669, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.995458602905273}, "pipe": "data"}
+{"event": "data", "data": {"rate": 131.29344970804513, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.995474815368652}, "pipe": "data"}
+{"event": "data", "data": {"rate": 167.7826801546604, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24204.375, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 107.51861469146162, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97037935256958}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.060278415679932}, "pipe": "data"}
+{"event": "data", "data": {"rate": 171.1188057875757, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [16206.375, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.003751754760742}, "pipe": "data"}
+{"event": "data", "data": {"rate": 136.69792901313284, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.941399097442627}, "pipe": "data"}
+{"event": "data", "data": {"rate": 169.9522817675844, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"}
+{"event": "data", "data": {"rate": 108.3922863171404, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24216.375, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.961507320404053}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.990056037902832}, "pipe": "data"}
+{"event": "data", "data": {"rate": 172.62301526721913, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.029509544372559}, "pipe": "data"}
+{"event": "data", "data": {"rate": 137.7681942586302, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24366.375, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.94654655456543}, "pipe": "data"}
+{"event": "data", "data": {"rate": 170.66232478957426, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"}
+{"event": "data", "data": {"rate": 107.81551083500315, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.032044410705566}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24422.375, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9754743576049805}, "pipe": "data"}
+{"event": "data", "data": {"rate": 171.29246278175938, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.979887008666992}, "pipe": "data"}
+{"event": "data", "data": {"rate": 137.487251266376, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.980226993560791}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [15344.375, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 169.98081199491273, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"}
+{"event": "data", "data": {"rate": 106.45916019767995, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.979134559631348}, "pipe": "data"}
+{"event": "line", "data": "Train: 0 [ 31/32 (100%)] Loss: 7.005 (7.00) Time: 0.729s, 175.55/s (1.385s, 92.41/s) LR: 1.000e-05 Data: 0.000 (0.038)\n", "pipe": "stderr"}
+{"event": "data", "data": {"rate": 168.21244220072643, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24446.375, 24512.0], "load": 0.96, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "line", "data": "Test: [ 0/32] Time: 0.867 (0.867) Loss: 6.9616 (6.9616) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"}
+{"event": "line", "data": "Test: [ 32/32] Time: 1.223 (0.302) Loss: 6.8639 (6.9459) Acc@1: 0.0000 ( 0.1453) Acc@5: 3.1250 ( 0.6541)\n", "pipe": "stderr"}
+{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"}
+{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/vodofeze.2024-04-09_00:35:28.669482/focalnet.D0/20240409-004541-focalnet_base_lrf-224/checkpoint-0.pth.tar', 0.14534883720930233)\n", "pipe": "stderr"}
+{"event": "line", "data": "\n", "pipe": "stderr"}
+{"event": "data", "data": {"rate": 175.47127320579483, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [8358.375, 24512.0], "load": 0.81, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [8358.375, 24512.0], "load": 0.96, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7198.375, 24512.0], "load": 0.96, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.020693778991699}, "pipe": "data"}
+{"event": "line", "data": "Train: 1 [ 0/32 ( 0%)] Loss: 7.021 (7.02) Time: 1.296s, 98.77/s (1.296s, 98.77/s) LR: 1.001e-02 Data: 0.476 (0.476)\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [11702.375, 24512.0], "load": 0.51, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.960958480834961}, "pipe": "data"}
+{"event": "data", "data": {"rate": 132.79107998992617, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"}
+{"event": "data", "data": {"rate": 146.87649199048235, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.003782272338867}, "pipe": "data"}
+{"event": "data", "data": {"rate": 154.12925408812737, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [15858.375, 24512.0], "load": 0.95, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.034578800201416}, "pipe": "data"}
+{"event": "data", "data": {"rate": 125.32590321875432, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.061990261077881}, "pipe": "data"}
+{"event": "data", "data": {"rate": 170.3416794985076, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"}
+{"event": "data", "data": {"rate": 104.15959893606829, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24220.375, 24512.0], "load": 0.95, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.065667152404785}, "pipe": "data"}
+{"event": "data", "data": {"rate": 132.51997500760237, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.019161224365234}, "pipe": "data"}
+{"event": "data", "data": {"rate": 167.85865633898098, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24222.375, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.061379432678223}, "pipe": "data"}
+{"event": "data", "data": {"rate": 133.90740532970187, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.077938556671143}, "pipe": "data"}
+{"event": "data", "data": {"rate": 168.92309339876343, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"}
+{"event": "data", "data": {"rate": 99.7747514053412, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.037508964538574}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24316.375, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 144.70835321349125, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.999046325683594}, "pipe": "data"}
+{"event": "data", "data": {"rate": 141.75601582491066, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"}
+{"event": "data", "data": {"rate": 116.60003774616504, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.063668727874756}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24392.375, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.077404499053955}, "pipe": "data"}
+{"event": "data", "data": {"rate": 169.8169410721702, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"}
+{"event": "data", "data": {"rate": 110.4658666551973, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.036367416381836}, "pipe": "data"}
+{"event": "data", "data": {"rate": 140.39447506543613, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24402.375, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.993802070617676}, "pipe": "data"}
+{"event": "data", "data": {"rate": 143.0095466626196, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"}
+{"event": "data", "data": {"rate": 112.85871090927047, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.007924556732178}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.098423957824707}, "pipe": "data"}
+{"event": "data", "data": {"rate": 170.36484490014465, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [15110.375, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 112.09772856530246, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.043365478515625}, "pipe": "data"}
+{"event": "data", "data": {"rate": 140.8971201876894, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.1979475021362305}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24198.375, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 141.75450732044862, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.147006034851074}, "pipe": "data"}
+{"event": "data", "data": {"rate": 125.32773343040974, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.981863021850586}, "pipe": "data"}
+{"event": "data", "data": {"rate": 169.9606144691258, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [11964.375, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "line", "data": "Train: 1 [ 31/32 (100%)] Loss: 7.108 (7.05) Time: 0.755s, 169.50/s (0.909s, 140.88/s) LR: 1.001e-02 Data: 0.000 (0.035)\n", "pipe": "stderr"}
+{"event": "data", "data": {"rate": 106.75002776334706, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"}
+{"event": "line", "data": "Test: [ 0/32] Time: 0.659 (0.659) Loss: 6.8922 (6.8922) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"}
+{"event": "line", "data": "Test: [ 32/32] Time: 0.068 (0.261) Loss: 6.9392 (6.9700) Acc@1: 0.0000 ( 0.2665) Acc@5: 0.0000 ( 0.9932)\n", "pipe": "stderr"}
+{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"}
+{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/vodofeze.2024-04-09_00:35:28.669482/focalnet.D0/20240409-004541-focalnet_base_lrf-224/checkpoint-1.pth.tar', 0.26647286821705424)\n", "pipe": "stderr"}
+{"event": "line", "data": "\n", "pipe": "stderr"}
+{"event": "data", "data": {"rate": 169.48692602613613, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24406.375, 24512.0], "load": 0.84, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24406.375, 24512.0], "load": 0.96, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24406.375, 24512.0], "load": 0.96, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.995840072631836}, "pipe": "data"}
+{"event": "line", "data": "Train: 2 [ 0/32 ( 0%)] Loss: 6.996 (7.00) Time: 1.491s, 85.87/s (1.491s, 85.87/s) LR: 2.001e-02 Data: 0.495 (0.495)\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24272.375, 24512.0], "load": 0.53, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.034093856811523}, "pipe": "data"}
+{"event": "data", "data": {"rate": 141.82756283914355, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.15028715133667}, "pipe": "data"}
+{"event": "data", "data": {"rate": 166.4497209118674, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"}
+{"event": "data", "data": {"rate": 106.89810855455488, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24152.375, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.148109436035156}, "pipe": "data"}
+{"event": "data", "data": {"rate": 165.41324457517086, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.019067764282227}, "pipe": "data"}
+{"event": "data", "data": {"rate": 151.4865170129708, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.009698867797852}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24364.375, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 133.6772144864222, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.025642395019531}, "pipe": "data"}
+{"event": "data", "data": {"rate": 167.09900989828867, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"}
+{"event": "data", "data": {"rate": 104.23724990928713, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.160573482513428}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24392.375, 24512.0], "load": 0.96, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 169.11552486688353, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0108962059021}, "pipe": "data"}
+{"event": "data", "data": {"rate": 151.56588464271024, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.101762771606445}, "pipe": "data"}
+{"event": "data", "data": {"rate": 131.59627105434186, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.141188621520996}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7624.375, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 167.16766598496127, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"}
+{"event": "data", "data": {"rate": 102.5111027633081, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.052355766296387}, "pipe": "data"}
+{"event": "data", "data": {"rate": 165.63165730081326, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.292587757110596}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [19852.375, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 148.08695207218898, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.112408638000488}, "pipe": "data"}
+{"event": "data", "data": {"rate": 132.39288173908304, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"}
+{"event": "stop", "data": null, "pipe": "data"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-focalnet.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "focalnet_base_lrf", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/vodofeze.2024-04-09_00:35:28.669482/focalnet.D0", "--checkpoint-hist", "1"], "time": 1712623653.885088, "return_code": -15}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/focalnet.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/focalnet.D1.data
new file mode 100644
index 000000000..ef395525a
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/focalnet.D1.data
@@ -0,0 +1,254 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "timm", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm", "plan": {"method": "per_gpu"}, "argv": {"--amp": true, "--model": "focalnet_base_lrf"}, "tags": ["classification", "convnet", "vision"], "weight": 2.0, "name": "focalnet", "tag": ["focalnet", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712623537.001089, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-focalnet.D1-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "focalnet_base_lrf", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/vodofeze.2024-04-09_00:35:28.669482/focalnet.D1", "--checkpoint-hist", "1"], "time": 1712623537.025442}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "line", "data": "Training with a single process on 1 device (cuda:0).\n", "pipe": "stderr"}
+{"event": "line", "data": "Model focalnet_base_lrf created, param count:88749768\n", "pipe": "stderr"}
+{"event": "line", "data": "Data processing configuration for current model + dataset:\n", "pipe": "stderr"}
+{"event": "line", "data": "\tinput_size: (3, 224, 224)\n", "pipe": "stderr"}
+{"event": "line", "data": "\tinterpolation: bicubic\n", "pipe": "stderr"}
+{"event": "line", "data": "\tmean: (0.485, 0.456, 0.406)\n", "pipe": "stderr"}
+{"event": "line", "data": "\tstd: (0.229, 0.224, 0.225)\n", "pipe": "stderr"}
+{"event": "line", "data": "\tcrop_pct: 0.9\n", "pipe": "stderr"}
+{"event": "line", "data": "\tcrop_mode: center\n", "pipe": "stderr"}
+{"event": "line", "data": "Learning rate (0.05) calculated from base learning rate (0.1) and global batch size (128) with linear scaling.\n", "pipe": "stderr"}
+{"event": "line", "data": "Using native Torch AMP. Training in mixed precision.\n", "pipe": "stderr"}
+{"event": "line", "data": "Scheduled epochs: 300. LR stepped per epoch.\n", "pipe": "stderr"}
+{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py:456: UserWarning: Applied workaround for CuDNN issue, install nvrtc.so (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:80.)\n", "pipe": "stderr"}
+{"event": "line", "data": " return F.conv2d(input, weight, bias, self.stride,\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "train", "loss": 7.004453659057617}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5626.375, 24512.0], "load": 0.37, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "line", "data": "Train: 0 [ 0/32 ( 0%)] Loss: 7.004 (7.00) Time: 17.579s, 7.28/s (17.579s, 7.28/s) LR: 1.000e-05 Data: 0.597 (0.597)\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [23512.375, 24512.0], "load": 0.93, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [13582.375, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [11038.375, 24512.0], "load": 0.98, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [10918.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [10618.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.006725788116455}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.013141632080078}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [16210.375, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.036336421966553}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.982904434204102}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9962921142578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [19750.375, 24512.0], "load": 0.96, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.99544620513916}, "pipe": "data"}
+{"event": "data", "data": {"rate": 138.93248807748276, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"}
+{"event": "data", "data": {"rate": 129.1306614863104, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9885573387146}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24470.375, 24512.0], "load": 0.95, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 168.94909915069914, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.060260772705078}, "pipe": "data"}
+{"event": "data", "data": {"rate": 138.86194686864405, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0094709396362305}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24148.375, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 152.98267604456046, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.941376686096191}, "pipe": "data"}
+{"event": "data", "data": {"rate": 137.91001271685465, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.056238651275635}, "pipe": "data"}
+{"event": "data", "data": {"rate": 168.32388235499852, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [17760.375, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 108.85460374429937, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9900665283203125}, "pipe": "data"}
+{"event": "data", "data": {"rate": 169.8606538571421, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.997450828552246}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24162.375, 24512.0], "load": 0.96, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.946535587310791}, "pipe": "data"}
+{"event": "data", "data": {"rate": 137.4970846455546, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"}
+{"event": "data", "data": {"rate": 128.51496073280597, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.032040119171143}, "pipe": "data"}
+{"event": "data", "data": {"rate": 168.31691174697158, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24430.375, 24512.0], "load": 0.95, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.990649223327637}, "pipe": "data"}
+{"event": "data", "data": {"rate": 132.2175152901551, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.980195045471191}, "pipe": "data"}
+{"event": "data", "data": {"rate": 174.88995392938844, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24434.375, 24512.0], "load": 0.96, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 137.6527772551722, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9791364669799805}, "pipe": "data"}
+{"event": "line", "data": "Train: 0 [ 31/32 (100%)] Loss: 7.005 (7.00) Time: 0.729s, 175.49/s (1.334s, 95.94/s) LR: 1.000e-05 Data: 0.001 (0.035)\n", "pipe": "stderr"}
+{"event": "data", "data": {"rate": 175.75539788439644, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"}
+{"event": "line", "data": "Test: [ 0/32] Time: 0.864 (0.864) Loss: 6.9615 (6.9615) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"}
+{"event": "line", "data": "Test: [ 32/32] Time: 1.242 (0.301) Loss: 6.8639 (6.9459) Acc@1: 0.0000 ( 0.1453) Acc@5: 3.1250 ( 0.6541)\n", "pipe": "stderr"}
+{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"}
+{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/vodofeze.2024-04-09_00:35:28.669482/focalnet.D1/20240409-004541-focalnet_base_lrf-224/checkpoint-0.pth.tar', 0.14534883720930233)\n", "pipe": "stderr"}
+{"event": "line", "data": "\n", "pipe": "stderr"}
+{"event": "data", "data": {"rate": 175.452445595315, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [7784.375, 24512.0], "load": 0.81, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [7784.375, 24512.0], "load": 0.96, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [7784.375, 24512.0], "load": 0.96, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [6628.375, 24512.0], "load": 0.79, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.020674705505371}, "pipe": "data"}
+{"event": "line", "data": "Train: 1 [ 0/32 ( 0%)] Loss: 7.021 (7.02) Time: 1.261s, 101.51/s (1.261s, 101.51/s) LR: 1.001e-02 Data: 0.447 (0.447)\n", "pipe": "stderr"}
+{"event": "data", "data": {"rate": 157.08403145219927, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0176897048950195}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24408.375, 24512.0], "load": 0.68, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.003789901733398}, "pipe": "data"}
+{"event": "data", "data": {"rate": 171.26652733612843, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"}
+{"event": "data", "data": {"rate": 115.46119741767964, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.034580230712891}, "pipe": "data"}
+{"event": "data", "data": {"rate": 141.8739121519906, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.062034606933594}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [14934.375, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 156.76215623989577, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.065652847290039}, "pipe": "data"}
+{"event": "data", "data": {"rate": 129.7413158045572, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.019181251525879}, "pipe": "data"}
+{"event": "data", "data": {"rate": 172.05059022868997, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24202.375, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 105.44917240280712, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.061359405517578}, "pipe": "data"}
+{"event": "data", "data": {"rate": 149.53084341574348, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.077919006347656}, "pipe": "data"}
+{"event": "data", "data": {"rate": 148.3702948852656, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [11638.375, 24512.0], "load": 0.96, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.037487983703613}, "pipe": "data"}
+{"event": "data", "data": {"rate": 130.77690977481686, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.999039173126221}, "pipe": "data"}
+{"event": "data", "data": {"rate": 172.7643045105407, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24208.375, 24512.0], "load": 0.95, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 107.16275404302822, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.063638687133789}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.077393054962158}, "pipe": "data"}
+{"event": "data", "data": {"rate": 173.11253852392164, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24190.375, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.036355495452881}, "pipe": "data"}
+{"event": "data", "data": {"rate": 138.1555736898298, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.993810653686523}, "pipe": "data"}
+{"event": "data", "data": {"rate": 161.2342676671038, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"}
+{"event": "data", "data": {"rate": 106.62639663117432, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.007869243621826}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24460.375, 24512.0], "load": 0.95, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 171.4367936596502, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.098379135131836}, "pipe": "data"}
+{"event": "data", "data": {"rate": 150.72752419516488, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.043417930603027}, "pipe": "data"}
+{"event": "data", "data": {"rate": 139.25212880726397, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24412.375, 24512.0], "load": 0.96, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.197956085205078}, "pipe": "data"}
+{"event": "data", "data": {"rate": 164.70451497242468, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"}
+{"event": "data", "data": {"rate": 106.06527356039646, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.1470232009887695}, "pipe": "data"}
+{"event": "data", "data": {"rate": 163.75250302897035, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.981897354125977}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [13198.375, 24512.0], "load": 0.95, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 144.6034434368278, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"}
+{"event": "line", "data": "Train: 1 [ 31/32 (100%)] Loss: 7.108 (7.05) Time: 0.752s, 170.13/s (0.888s, 144.16/s) LR: 1.001e-02 Data: 0.000 (0.034)\n", "pipe": "stderr"}
+{"event": "line", "data": "Test: [ 0/32] Time: 0.687 (0.687) Loss: 6.8922 (6.8922) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"}
+{"event": "line", "data": "Test: [ 32/32] Time: 0.067 (0.261) Loss: 6.9395 (6.9699) Acc@1: 0.0000 ( 0.2665) Acc@5: 0.0000 ( 0.9932)\n", "pipe": "stderr"}
+{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"}
+{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/vodofeze.2024-04-09_00:35:28.669482/focalnet.D1/20240409-004541-focalnet_base_lrf-224/checkpoint-1.pth.tar', 0.26647286821705424)\n", "pipe": "stderr"}
+{"event": "line", "data": "\n", "pipe": "stderr"}
+{"event": "data", "data": {"rate": 140.305475448802, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24408.375, 24512.0], "load": 0.82, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24408.375, 24512.0], "load": 0.96, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24408.375, 24512.0], "load": 0.96, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.995878219604492}, "pipe": "data"}
+{"event": "line", "data": "Train: 2 [ 0/32 ( 0%)] Loss: 6.996 (7.00) Time: 1.488s, 86.01/s (1.488s, 86.01/s) LR: 2.001e-02 Data: 0.494 (0.494)\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24154.375, 24512.0], "load": 0.51, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 111.41416305057494, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.034097671508789}, "pipe": "data"}
+{"event": "data", "data": {"rate": 162.86804178109608, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.150274276733398}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24180.375, 24512.0], "load": 0.94, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.148129940032959}, "pipe": "data"}
+{"event": "data", "data": {"rate": 137.63683937233148, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.019026756286621}, "pipe": "data"}
+{"event": "data", "data": {"rate": 167.7525285656443, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [18326.375, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 106.25671577205398, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.009702205657959}, "pipe": "data"}
+{"event": "data", "data": {"rate": 169.2617637858093, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.025614261627197}, "pipe": "data"}
+{"event": "data", "data": {"rate": 147.8443292493536, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24196.375, 24512.0], "load": 0.96, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.1605610847473145}, "pipe": "data"}
+{"event": "data", "data": {"rate": 137.3981708410155, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.010904312133789}, "pipe": "data"}
+{"event": "data", "data": {"rate": 159.7444332677976, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"}
+{"event": "data", "data": {"rate": 106.39428851055085, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.101785659790039}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24294.375, 24512.0], "load": 0.95, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 168.3947192426414, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.141188144683838}, "pipe": "data"}
+{"event": "data", "data": {"rate": 149.41564957731674, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.052373886108398}, "pipe": "data"}
+{"event": "data", "data": {"rate": 138.55172088573545, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24388.375, 24512.0], "load": 0.95, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.292572975158691}, "pipe": "data"}
+{"event": "data", "data": {"rate": 165.284024480878, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"}
+{"event": "data", "data": {"rate": 107.28100103801494, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.112317085266113}, "pipe": "data"}
+{"event": "data", "data": {"rate": 165.56687318961033, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24458.375, 24512.0], "load": 0.95, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.083057880401611}, "pipe": "data"}
+{"event": "data", "data": {"rate": 145.01590332764957, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.190218448638916}, "pipe": "data"}
+{"event": "data", "data": {"rate": 137.08163325423587, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24458.375, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"rate": 130.51220037123747, "units": "items/s", "task": "train"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"}
+{"event": "stop", "data": null, "pipe": "data"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-focalnet.D1-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "focalnet_base_lrf", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/vodofeze.2024-04-09_00:35:28.669482/focalnet.D1", "--checkpoint-hist", "1"], "time": 1712623655.5555615, "return_code": -15}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/fp16.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/fp16.D0.data
new file mode 100644
index 000000000..a65899a70
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/fp16.D0.data
@@ -0,0 +1,222 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/flops", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "flops", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops", "plan": {"method": "per_gpu"}, "tags": ["diagnostic", "flops"], "argv": {"--number": 30, "--repeat": 90, "--m": 8192, "--n": 8192, "--dtype": "fp16"}, "weight": 0.0, "name": "fp16", "tag": ["fp16", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712622992.209855, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "30", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "fp16"], "time": 1712622994.5857365}, "pipe": null}
+{"event": "data", "data": {"task": "train", "rate": 92.08166284929595, "units": "Tflops", "t": 1712622996.6545672}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2393.125, 24512.0], "load": 0, "temperature": null, "power": null}}, "t": 1712622995.7894864}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.06, "temperature": null, "power": null}}, "t": 1712622996.2953954}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 94.46340304625043, "units": "Tflops", "t": 1712622997.3536675}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.26, "temperature": null, "power": null}}, "t": 1712622996.8009908}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.39, "temperature": null, "power": null}}, "t": 1712622997.3064864}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 94.47707859990244, "units": "Tflops", "t": 1712622998.0521567}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.59, "temperature": null, "power": null}}, "t": 1712622997.8122258}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 95.27063050012336, "units": "Tflops", "t": 1712622998.7460208}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.72, "temperature": null, "power": null}}, "t": 1712622998.3176758}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 94.33750972713605, "units": "Tflops", "t": 1712622999.4454556}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.92, "temperature": null, "power": null}}, "t": 1712622998.823255}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712622999.3286662}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 94.02872880136583, "units": "Tflops", "t": 1712623000.1472428}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712622999.8340993}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 94.65276098093757, "units": "Tflops", "t": 1712623000.8451257}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623000.3395743}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.98, "temperature": null, "power": null}}, "t": 1712623000.8450837}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 94.49708300157378, "units": "Tflops", "t": 1712623001.543527}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623001.3505857}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.8620390214926, "units": "Tflops", "t": 1712623002.2465084}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623001.8559945}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 94.21987534073035, "units": "Tflops", "t": 1712623002.9469368}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623002.3616176}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623002.8672562}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 94.4229478896861, "units": "Tflops", "t": 1712623003.645765}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623003.372737}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.79112194850264, "units": "Tflops", "t": 1712623004.349402}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623003.8782525}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.95172154878891, "units": "Tflops", "t": 1712623005.051886}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623004.383736}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623004.889268}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 94.08531945543064, "units": "Tflops", "t": 1712623005.7532241}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623005.3947623}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.88856898936464, "units": "Tflops", "t": 1712623006.4561307}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623005.9005044}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623006.4059918}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.7232324609725, "units": "Tflops", "t": 1712623007.1601987}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623006.9113808}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.32415085519008, "units": "Tflops", "t": 1712623007.867364}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623007.416881}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.75257457843477, "units": "Tflops", "t": 1712623008.5711486}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623007.9224908}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623008.4280367}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.5307107343003, "units": "Tflops", "t": 1712623009.2767794}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623008.933456}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.5784109409658, "units": "Tflops", "t": 1712623009.9818826}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623009.4390833}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623009.9445107}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.56312766178485, "units": "Tflops", "t": 1712623010.6871383}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623010.4499366}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.55199268141921, "units": "Tflops", "t": 1712623011.392611}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623010.9555047}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.32701524311578, "units": "Tflops", "t": 1712623012.0996048}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623011.460999}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623011.9666839}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.16349201889635, "units": "Tflops", "t": 1712623012.8078792}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623012.4722576}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.21251390623921, "units": "Tflops", "t": 1712623013.5158713}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623012.9777634}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623013.483183}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.31511816795683, "units": "Tflops", "t": 1712623014.2230046}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623013.988673}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.85180387635134, "units": "Tflops", "t": 1712623014.9336193}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623014.4942677}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.5612638274786, "units": "Tflops", "t": 1712623015.6466072}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623014.999742}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623015.5052178}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.50547060603141, "units": "Tflops", "t": 1712623016.3599243}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623016.0106668}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.92130706707509, "units": "Tflops", "t": 1712623017.0700045}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623016.5161417}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623017.0217223}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.58808583573641, "units": "Tflops", "t": 1712623017.7826748}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623017.5271783}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.75298248214362, "units": "Tflops", "t": 1712623018.4941735}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623018.0326412}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.68127826127144, "units": "Tflops", "t": 1712623019.2060993}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623018.5382078}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623019.0436742}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.7250083712347, "units": "Tflops", "t": 1712623019.9177196}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623019.5492127}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.67662194482206, "units": "Tflops", "t": 1712623020.6296773}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623020.0548851}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623020.5604424}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.68677331651891, "units": "Tflops", "t": 1712623021.341746}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623021.0659506}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.69127540101898, "units": "Tflops", "t": 1712623022.0535996}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623021.5714943}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.54847774134608, "units": "Tflops", "t": 1712623022.766544}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623022.077117}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623022.582522}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.30279362577772, "units": "Tflops", "t": 1712623023.4814177}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623023.088055}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.52375148821281, "units": "Tflops", "t": 1712623024.1945548}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623023.5934694}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623024.0988743}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.40550219713224, "units": "Tflops", "t": 1712623024.9087641}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623024.6044054}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.25496976324989, "units": "Tflops", "t": 1712623025.6239767}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623025.1099114}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623025.6154466}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.08586117079452, "units": "Tflops", "t": 1712623026.340537}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623026.120936}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.22249996438542, "units": "Tflops", "t": 1712623027.055997}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623026.62638}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.19742531544858, "units": "Tflops", "t": 1712623027.7716515}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623027.132516}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623027.6378732}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.89743701403441, "units": "Tflops", "t": 1712623028.4896834}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623028.143282}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.86717035881608, "units": "Tflops", "t": 1712623029.20807}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623028.6488526}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623029.1543324}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.8183833653662, "units": "Tflops", "t": 1712623029.918984}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623029.6596901}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.64816650002453, "units": "Tflops", "t": 1712623030.6311603}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623030.1650763}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.00257922201564, "units": "Tflops", "t": 1712623031.3483407}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623030.6706643}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623031.1761262}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.5145328138153, "units": "Tflops", "t": 1712623032.0615726}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623031.6814826}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.56993437018025, "units": "Tflops", "t": 1712623032.7743568}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623032.186955}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623032.6925025}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.24140715247712, "units": "Tflops", "t": 1712623033.48984}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623033.1978936}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.4382554150294, "units": "Tflops", "t": 1712623034.203636}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623033.7033541}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.44458647028648, "units": "Tflops", "t": 1712623034.917378}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623034.208746}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623034.7142591}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.39041451155936, "units": "Tflops", "t": 1712623035.6315746}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623035.2196386}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.03957821822611, "units": "Tflops", "t": 1712623036.3484507}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623035.7250361}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623036.2304409}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.33344048427077, "units": "Tflops", "t": 1712623037.0630863}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623036.7359378}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.03293518162651, "units": "Tflops", "t": 1712623037.780027}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623037.241323}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623037.7466772}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.24017718095043, "units": "Tflops", "t": 1712623038.495559}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623038.2522306}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.90543415773632, "units": "Tflops", "t": 1712623039.2134862}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623038.757848}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.87717567547645, "units": "Tflops", "t": 1712623039.9316294}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623039.263311}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623039.7686636}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.06052383637027, "units": "Tflops", "t": 1712623040.6483762}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623040.27405}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.9141654533217, "units": "Tflops", "t": 1712623041.3662302}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623040.7793975}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623041.2849655}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.47148845525015, "units": "Tflops", "t": 1712623042.0876143}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623041.7904794}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.91886760736999, "units": "Tflops", "t": 1712623042.8054354}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623042.2958736}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623042.8012705}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.84027662349271, "units": "Tflops", "t": 1712623043.5239134}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623043.3066254}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.80234116740215, "units": "Tflops", "t": 1712623044.242781}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623043.811981}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.41917551471167, "units": "Tflops", "t": 1712623044.9645212}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623044.317373}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623044.8227198}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.80706234746496, "units": "Tflops", "t": 1712623045.6832542}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623045.328158}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.67881903652602, "units": "Tflops", "t": 1712623046.402969}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623045.8335593}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623046.3389757}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.56526342379955, "units": "Tflops", "t": 1712623047.1236045}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623046.8445172}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.72425329101834, "units": "Tflops", "t": 1712623047.842945}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623047.3498883}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.59254204339663, "units": "Tflops", "t": 1712623048.5633183}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623047.8554265}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623048.3607957}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.5693844841155, "units": "Tflops", "t": 1712623049.2839131}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623048.8662121}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.5658391378761, "units": "Tflops", "t": 1712623050.004513}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623049.3716223}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623049.8770056}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.56150630972388, "units": "Tflops", "t": 1712623050.7251828}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623050.3823702}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.5623243647558, "units": "Tflops", "t": 1712623051.4459584}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623050.8878477}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623051.393243}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.56438471627182, "units": "Tflops", "t": 1712623052.1665988}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623051.8986564}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.19080156794898, "units": "Tflops", "t": 1712623052.8901527}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623052.4040213}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.43799644415854, "units": "Tflops", "t": 1712623053.6117477}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623052.9094834}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623053.4148254}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.37651726013047, "units": "Tflops", "t": 1712623054.3338902}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623053.9203157}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.17076047220235, "units": "Tflops", "t": 1712623055.0576012}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623054.4256735}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623054.9310958}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.09926126797762, "units": "Tflops", "t": 1712623055.7819135}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623055.4368086}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.34888462246634, "units": "Tflops", "t": 1712623056.5042105}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623055.9421837}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623056.4476187}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.25130877824122, "units": "Tflops", "t": 1712623057.227316}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623056.9532337}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.00311557105228, "units": "Tflops", "t": 1712623057.9523687}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623057.4586794}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 90.92724736926372, "units": "Tflops", "t": 1712623058.6781547}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623057.9647615}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623058.470128}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 90.9462249769573, "units": "Tflops", "t": 1712623059.4036999}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623058.9755013}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.01688529014208, "units": "Tflops", "t": 1712623060.1286366}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623059.4808471}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623059.9862804}, "pipe": "data"}
+{"event": "end", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "30", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "fp16"], "time": 1712623060.8646693, "return_code": 0}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/fp16.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/fp16.D1.data
new file mode 100644
index 000000000..b6f7ac19c
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/fp16.D1.data
@@ -0,0 +1,222 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/flops", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "flops", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops", "plan": {"method": "per_gpu"}, "tags": ["diagnostic", "flops"], "argv": {"--number": 30, "--repeat": 90, "--m": 8192, "--n": 8192, "--dtype": "fp16"}, "weight": 0.0, "name": "fp16", "tag": ["fp16", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712622994.575693, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "30", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "fp16"], "time": 1712622994.586551}, "pipe": null}
+{"event": "data", "data": {"task": "train", "rate": 91.70418980853003, "units": "Tflops", "t": 1712622996.643229}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2393.125, 24512.0], "load": 0, "temperature": null, "power": null}}, "t": 1712622995.7396424}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.03, "temperature": null, "power": null}}, "t": 1712622996.245979}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 94.51615960137482, "units": "Tflops", "t": 1712622997.3418105}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.23, "temperature": null, "power": null}}, "t": 1712622996.7517054}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.36, "temperature": null, "power": null}}, "t": 1712622997.2573478}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 94.59464505128611, "units": "Tflops", "t": 1712622998.0393934}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.56, "temperature": null, "power": null}}, "t": 1712622997.76337}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 95.09898952356647, "units": "Tflops", "t": 1712622998.7336943}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.76, "temperature": null, "power": null}}, "t": 1712622998.2691383}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 94.18757890113248, "units": "Tflops", "t": 1712622999.4344435}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.89, "temperature": null, "power": null}}, "t": 1712622998.7750297}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712622999.280922}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 94.40323252290072, "units": "Tflops", "t": 1712623000.133489}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712622999.7868829}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 94.34046882469497, "units": "Tflops", "t": 1712623000.8333347}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623000.292938}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623000.798655}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.8998798363642, "units": "Tflops", "t": 1712623001.5362039}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623001.304783}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 94.06641635845962, "units": "Tflops", "t": 1712623002.2376907}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623001.8106058}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.87576393945541, "units": "Tflops", "t": 1712623002.940616}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623002.3167546}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623002.8227825}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.77777137422436, "units": "Tflops", "t": 1712623003.6445663}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623003.3286362}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.69295694193272, "units": "Tflops", "t": 1712623004.348812}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623003.8346548}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623004.3405373}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.74447508079143, "units": "Tflops", "t": 1712623005.0528493}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623004.8462758}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.8119500753148, "units": "Tflops", "t": 1712623005.756415}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623005.3524656}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.33444458502044, "units": "Tflops", "t": 1712623006.4635222}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623005.8584332}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623006.3644679}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.72158171886194, "units": "Tflops", "t": 1712623007.1677718}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623006.870605}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.49215595170975, "units": "Tflops", "t": 1712623007.873912}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623007.3766305}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.32581910369925, "units": "Tflops", "t": 1712623008.5813255}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623007.8826532}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623008.3885982}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.98569568958946, "units": "Tflops", "t": 1712623009.2911797}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623008.8941534}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.38534848607807, "units": "Tflops", "t": 1712623009.997979}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623009.40012}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623009.905947}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.15398862214593, "units": "Tflops", "t": 1712623010.7067323}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623010.4120927}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.9530218358542, "units": "Tflops", "t": 1712623011.4169042}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623010.9180255}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.11962948357804, "units": "Tflops", "t": 1712623012.1258833}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623011.4240026}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623011.9298837}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.85840984437414, "units": "Tflops", "t": 1712623012.8369062}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623012.4358428}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.86127687768717, "units": "Tflops", "t": 1712623013.5477648}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623012.9414716}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623013.4476354}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.75167664817671, "units": "Tflops", "t": 1712623014.2595801}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623013.9536762}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.80151094635453, "units": "Tflops", "t": 1712623014.9709868}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623014.4597566}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623014.9664018}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.64019466266264, "units": "Tflops", "t": 1712623015.6835904}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623015.472422}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.55504061789287, "units": "Tflops", "t": 1712623016.396877}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623015.9784062}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.62459616436136, "units": "Tflops", "t": 1712623017.1096509}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623016.4845476}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623016.9904583}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.47137179405848, "units": "Tflops", "t": 1712623017.823562}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623017.4962974}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.25487748711814, "units": "Tflops", "t": 1712623018.5390491}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623018.0020373}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623018.5082588}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.49545166494232, "units": "Tflops", "t": 1712623019.2527041}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623019.0141184}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.64875591211539, "units": "Tflops", "t": 1712623019.965068}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623019.5202656}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.51403790339505, "units": "Tflops", "t": 1712623020.678609}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623020.0262325}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623020.5323527}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.69652321009468, "units": "Tflops", "t": 1712623021.3907018}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623021.0382962}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.52919693743462, "units": "Tflops", "t": 1712623022.1040637}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623021.5442038}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623022.050078}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.20049745596445, "units": "Tflops", "t": 1712623022.820127}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623022.555964}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.14369597757768, "units": "Tflops", "t": 1712623023.536554}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623023.0618203}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.61693838728154, "units": "Tflops", "t": 1712623024.2492573}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623023.5677712}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623024.0737603}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.99881672318124, "units": "Tflops", "t": 1712623024.9668403}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623024.5797675}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.74301754868569, "units": "Tflops", "t": 1712623025.6864083}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623025.085748}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623025.5916467}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.17466713180531, "units": "Tflops", "t": 1712623026.4026227}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623026.0977418}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.36096298855833, "units": "Tflops", "t": 1712623027.1173947}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623026.6039402}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623027.1099854}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.1595012502076, "units": "Tflops", "t": 1712623027.833589}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623027.6155696}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.90381630262677, "units": "Tflops", "t": 1712623028.551854}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623028.1210506}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.11854148551038, "units": "Tflops", "t": 1712623029.2685163}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623028.6266153}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623029.1333055}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.14995605849059, "units": "Tflops", "t": 1712623029.9848218}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623029.6388092}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.99740968886032, "units": "Tflops", "t": 1712623030.7023852}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623030.144329}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623030.6499572}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.61758940409794, "units": "Tflops", "t": 1712623031.4152353}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623031.155502}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 93.13956472857632, "units": "Tflops", "t": 1712623032.1238508}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623031.661176}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.73955280271805, "units": "Tflops", "t": 1712623032.8357196}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623032.1667213}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623032.6724074}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.45752922763144, "units": "Tflops", "t": 1712623033.5496342}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623033.177893}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.75829947149221, "units": "Tflops", "t": 1712623034.26137}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623033.6834056}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623034.1889622}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.85844100682803, "units": "Tflops", "t": 1712623034.972308}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623034.694549}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.13777423297981, "units": "Tflops", "t": 1712623035.6888282}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623035.2001467}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.34598230103173, "units": "Tflops", "t": 1712623036.403688}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623035.705657}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623036.2113352}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.39248144917126, "units": "Tflops", "t": 1712623037.118279}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623036.716905}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.3913091448458, "units": "Tflops", "t": 1712623037.8328185}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623037.222537}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623037.728182}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.98924362208147, "units": "Tflops", "t": 1712623038.5503223}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623038.2337997}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.66572895668912, "units": "Tflops", "t": 1712623039.2705257}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623038.739361}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623039.2448957}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.30636547817907, "units": "Tflops", "t": 1712623039.9857078}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623039.7505426}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.99777673714256, "units": "Tflops", "t": 1712623040.7031956}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623040.256156}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.78714531089516, "units": "Tflops", "t": 1712623041.4223876}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623040.761651}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623041.2672946}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.94220229247908, "units": "Tflops", "t": 1712623042.1403}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623041.7727866}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 92.12357128514782, "units": "Tflops", "t": 1712623042.8567107}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623042.2783692}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623042.7839527}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.9027784633014, "units": "Tflops", "t": 1712623043.575108}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623043.2895126}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.74824979612652, "units": "Tflops", "t": 1712623044.2946892}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623043.795011}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.88614570804944, "units": "Tflops", "t": 1712623045.0131676}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623044.3006394}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623044.806173}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.80179293279893, "units": "Tflops", "t": 1712623045.7322848}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623045.3119488}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.82299610696131, "units": "Tflops", "t": 1712623046.4512634}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623045.817501}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623046.323064}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.58781258492799, "units": "Tflops", "t": 1712623047.1720521}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623046.8287315}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.22555633694935, "units": "Tflops", "t": 1712623047.8957145}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623047.3342905}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623047.8397963}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.57247552279765, "units": "Tflops", "t": 1712623048.6167274}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623048.3453534}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.78090398193555, "units": "Tflops", "t": 1712623049.335958}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623048.8509364}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.37135749649583, "units": "Tflops", "t": 1712623050.0584745}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623049.3569517}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623049.8625834}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.44615559451591, "units": "Tflops", "t": 1712623050.7804582}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623050.3682342}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.45652283747489, "units": "Tflops", "t": 1712623051.502238}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623050.8737676}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623051.3793852}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.13982975192194, "units": "Tflops", "t": 1712623052.2264097}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623051.8849592}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.16168928973029, "units": "Tflops", "t": 1712623052.9505439}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623052.390643}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623052.8961968}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.6268452303057, "units": "Tflops", "t": 1712623053.671098}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623053.4017353}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.2917417466255, "units": "Tflops", "t": 1712623054.3942606}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623053.9074473}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.29110923532329, "units": "Tflops", "t": 1712623055.1172082}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623054.4130347}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623054.918565}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.73091265929129, "units": "Tflops", "t": 1712623055.8367677}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623055.424327}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.222759341934, "units": "Tflops", "t": 1712623056.5604103}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623055.9298296}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623056.4354153}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.30481560947548, "units": "Tflops", "t": 1712623057.283447}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623056.9409916}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.2253458044737, "units": "Tflops", "t": 1712623058.0070715}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623057.446526}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623057.9523103}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.16201966536177, "units": "Tflops", "t": 1712623058.7313108}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623058.4579241}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 90.92387108174415, "units": "Tflops", "t": 1712623059.4574041}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623058.964685}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 91.16382175641448, "units": "Tflops", "t": 1712623060.1815817}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623059.47033}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623059.9759045}, "pipe": "data"}
+{"event": "end", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "30", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "fp16"], "time": 1712623060.8045602, "return_code": 0}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/fp32.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/fp32.D0.data
new file mode 100644
index 000000000..22fa0eca6
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/fp32.D0.data
@@ -0,0 +1,350 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/flops", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "flops", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops", "plan": {"method": "per_gpu"}, "tags": ["diagnostic", "flops"], "argv": {"--number": 10, "--repeat": 90, "--m": 8192, "--n": 8192, "--dtype": "fp32"}, "weight": 0.0, "name": "fp32", "tag": ["fp32", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712623143.168964, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "10", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "fp32"], "time": 1712623145.5541213}, "pipe": null}
+{"event": "data", "data": {"task": "train", "rate": 15.62976628799767, "units": "Tflops", "t": 1712623148.2794623}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2390.625, 24512.0], "load": 0, "temperature": null, "power": null}}, "t": 1712623146.7343583}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.07, "temperature": null, "power": null}}, "t": 1712623147.240455}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.27, "temperature": null, "power": null}}, "t": 1712623147.7458909}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.4, "temperature": null, "power": null}}, "t": 1712623148.2515116}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.586012522474478, "units": "Tflops", "t": 1712623149.6911802}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.6, "temperature": null, "power": null}}, "t": 1712623148.757368}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.73, "temperature": null, "power": null}}, "t": 1712623149.2630699}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.550587168804276, "units": "Tflops", "t": 1712623151.106653}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.93, "temperature": null, "power": null}}, "t": 1712623149.7688088}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623150.2747898}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623150.7805328}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.73362784750848, "units": "Tflops", "t": 1712623152.5054889}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623151.2865005}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623151.7925987}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623152.29853}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.694724377272777, "units": "Tflops", "t": 1712623153.9070761}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623152.80427}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623153.3103147}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623153.8162634}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.733724468877764, "units": "Tflops", "t": 1712623155.3054283}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623154.322284}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623154.8281431}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.688925859428226, "units": "Tflops", "t": 1712623156.7076564}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623155.3340673}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623155.839721}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623156.3455276}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.626366234271808, "units": "Tflops", "t": 1712623158.1155984}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623156.8512418}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623157.356838}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623157.8628244}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.657126390428092, "units": "Tflops", "t": 1712623159.5207253}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623158.3689232}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623158.8746297}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623159.3802693}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.614448609162695, "units": "Tflops", "t": 1712623160.9294395}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623159.8861175}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623160.3921404}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623160.8977985}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.581536373901878, "units": "Tflops", "t": 1712623162.3412306}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623161.403583}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623161.9096243}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.559776852572508, "units": "Tflops", "t": 1712623163.7550926}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623162.4156017}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623162.921646}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623163.427552}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.592093669920724, "units": "Tflops", "t": 1712623165.1661284}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623163.9334157}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623164.439304}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623164.9452693}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.52491997886349, "units": "Tflops", "t": 1712623166.5832126}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623165.451152}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623165.956867}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623166.4625702}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.541646641821664, "units": "Tflops", "t": 1712623167.998523}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623166.9682386}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623167.4739873}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623167.9796324}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.500848179416678, "units": "Tflops", "t": 1712623169.4174123}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623168.4855387}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623168.9913487}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.506722256695717, "units": "Tflops", "t": 1712623170.8357658}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623169.497226}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623170.0032716}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623170.509203}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.471931121826229, "units": "Tflops", "t": 1712623172.2573366}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623171.0151463}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623171.5208285}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623172.0266166}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.526440997535323, "units": "Tflops", "t": 1712623173.6738944}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623172.5324461}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623173.0385096}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623173.5443}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.4967306407131, "units": "Tflops", "t": 1712623175.093186}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623174.0498538}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623174.5555212}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623175.0612988}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.478928808114802, "units": "Tflops", "t": 1712623176.5143802}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623175.5671601}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623176.0731006}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.432643797985213, "units": "Tflops", "t": 1712623177.939771}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623176.5791144}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623177.0853786}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623177.5909264}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.488538535492522, "units": "Tflops", "t": 1712623179.3601747}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623178.0968945}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623178.6029475}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623179.108967}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.40091960445591, "units": "Tflops", "t": 1712623180.7887197}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623179.6151142}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623180.1212611}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623180.62712}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.459190567868566, "units": "Tflops", "t": 1712623182.2117493}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623181.133025}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623181.6387794}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623182.1447358}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.430849373271085, "units": "Tflops", "t": 1712623183.6374407}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623182.650596}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623183.1563785}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.428967612108977, "units": "Tflops", "t": 1712623185.0632534}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623183.662359}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623184.168422}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623184.6743152}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.374528846881462, "units": "Tflops", "t": 1712623186.4939604}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623185.1799312}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623185.685834}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623186.191722}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.384437869935564, "units": "Tflops", "t": 1712623187.9239912}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623186.6976998}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623187.2035174}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623187.7092693}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.429584490095262, "units": "Tflops", "t": 1712623189.3495817}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623188.2151525}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623188.7207992}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623189.226558}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.383814332964795, "units": "Tflops", "t": 1712623190.7792835}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623189.7324278}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623190.238201}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623190.744012}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.362196325471512, "units": "Tflops", "t": 1712623192.2109926}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623191.250554}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623191.7565503}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.406378475928777, "units": "Tflops", "t": 1712623193.63856}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623192.2626312}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623192.7685645}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623193.2743196}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.34377288348282, "units": "Tflops", "t": 1712623195.0719833}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623193.7801118}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623194.2857714}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623194.7915013}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.351786975217053, "units": "Tflops", "t": 1712623196.5046751}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623195.2972264}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623195.8029387}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623196.3089697}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.416807899838041, "units": "Tflops", "t": 1712623197.9313083}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623196.8151894}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623197.3209934}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623197.8266954}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.390809567405666, "units": "Tflops", "t": 1712623199.3603685}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623198.3324423}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623198.8379476}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623199.3437696}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.35338670905245, "units": "Tflops", "t": 1712623200.792884}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623199.849249}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623200.3551397}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.362725991615843, "units": "Tflops", "t": 1712623202.2245295}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623200.861123}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623201.366894}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623201.8725817}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.359592025282396, "units": "Tflops", "t": 1712623203.6564894}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623202.378284}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623202.883945}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623203.3896947}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.395120257286225, "units": "Tflops", "t": 1712623205.0851436}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623203.895643}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623204.4014237}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623204.9073312}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.282803367242195, "units": "Tflops", "t": 1712623206.5242908}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623205.4131105}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623205.9189234}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623206.4245973}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.315646913198467, "units": "Tflops", "t": 1712623207.9603505}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623206.9303753}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623207.4361382}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623207.9418938}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.358632903440135, "units": "Tflops", "t": 1712623209.3923862}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623208.4477549}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623208.9536421}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.36454043895332, "units": "Tflops", "t": 1712623210.823848}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623209.4595373}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623209.9653819}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623210.4713094}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.299796293253856, "units": "Tflops", "t": 1712623212.261403}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623210.977123}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623211.4830146}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623211.9886806}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.337105963196048, "units": "Tflops", "t": 1712623213.6954606}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623212.4945838}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623213.00027}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623213.5060067}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.341891883288518, "units": "Tflops", "t": 1712623215.1290634}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623214.0117872}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623214.5175831}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623215.0232813}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.344089406458691, "units": "Tflops", "t": 1712623216.5624666}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623215.5289412}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623216.0344772}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623216.5403383}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.314095716717055, "units": "Tflops", "t": 1712623217.999008}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623217.0462832}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623217.5522556}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.311352648335134, "units": "Tflops", "t": 1712623219.435748}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623218.0581934}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623218.5641136}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623219.0701606}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.31543328715715, "units": "Tflops", "t": 1712623220.8721755}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623219.576176}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623220.0822496}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623220.5882049}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.293078775420465, "units": "Tflops", "t": 1712623222.3107188}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623221.094266}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623221.6001718}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623222.1060634}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.284300107141183, "units": "Tflops", "t": 1712623223.749946}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623222.6119094}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623223.1178586}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623223.623626}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.323820039277066, "units": "Tflops", "t": 1712623225.185607}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623224.1293428}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623224.635378}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623225.1424954}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.290028920835352, "units": "Tflops", "t": 1712623226.6244035}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623225.648441}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623226.1544676}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.284193730161883, "units": "Tflops", "t": 1712623228.063738}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623226.6603825}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623227.166513}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623227.6724968}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.275503682675145, "units": "Tflops", "t": 1712623229.5039146}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623228.1782725}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623228.6840498}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623229.190034}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.267518471751721, "units": "Tflops", "t": 1712623230.9446933}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623229.695949}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623230.2019284}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623230.7078245}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.281334770426842, "units": "Tflops", "t": 1712623232.3841107}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623231.2139034}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623231.719584}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623232.2252324}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.265165970090512, "units": "Tflops", "t": 1712623233.8249192}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623232.7312286}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623233.2370214}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623233.7425423}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.19924334263539, "units": "Tflops", "t": 1712623235.2719972}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623234.2485018}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623234.7541943}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623235.2598674}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.256744856502193, "units": "Tflops", "t": 1712623236.7135925}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623235.7656553}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623236.2714305}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.254902793578506, "units": "Tflops", "t": 1712623238.1553519}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623236.7771225}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623237.2827394}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623237.7885091}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.260690386193854, "units": "Tflops", "t": 1712623239.5965753}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623238.2945452}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623238.8003397}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623239.3062758}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.229791720160234, "units": "Tflops", "t": 1712623241.04074}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623239.8119082}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623240.3177161}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623240.8235116}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.191960686733216, "units": "Tflops", "t": 1712623242.4884946}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623241.3295557}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623241.835622}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623242.3413818}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.19265635567003, "units": "Tflops", "t": 1712623243.9361951}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623242.847141}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623243.3528843}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623243.8586073}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.210515292695826, "units": "Tflops", "t": 1712623245.3821833}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623244.3647761}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623244.870404}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623245.3761282}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.258466199476299, "units": "Tflops", "t": 1712623246.8236322}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623245.8823876}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623246.3912947}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.212128369903493, "units": "Tflops", "t": 1712623248.2694557}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623246.8974283}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623247.403174}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623247.908928}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.2003880727743, "units": "Tflops", "t": 1712623249.7163806}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623248.4153736}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623248.9210052}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623249.4267824}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.229012181487855, "units": "Tflops", "t": 1712623251.1606236}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623249.9324074}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623250.4380903}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623250.943781}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.197718138691217, "units": "Tflops", "t": 1712623252.607814}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623251.4494083}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623251.9551082}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623252.4609704}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.204261895836751, "units": "Tflops", "t": 1712623254.0543878}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623252.9668636}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623253.4725225}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623253.9783337}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.161302048780465, "units": "Tflops", "t": 1712623255.5050437}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623254.48428}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623254.9902194}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623255.4959342}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.203484968401295, "units": "Tflops", "t": 1712623256.9516847}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623256.001578}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623256.5074155}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.170848205617109, "units": "Tflops", "t": 1712623258.401399}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623257.013132}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623257.5185776}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623258.0244126}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.133170030632277, "units": "Tflops", "t": 1712623259.8547711}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623258.5300646}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623259.0357366}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623259.541619}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.181928122329465, "units": "Tflops", "t": 1712623261.3040497}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623260.047471}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623260.5530705}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623261.0590227}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.20911071409159, "units": "Tflops", "t": 1712623262.75057}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623261.564893}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623262.070633}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623262.576513}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.146754084236033, "units": "Tflops", "t": 1712623264.2028444}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623263.0828197}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623263.5887723}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623264.0945861}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.155447635143537, "units": "Tflops", "t": 1712623265.6541693}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623264.6004422}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623265.106242}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623265.6119308}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.16745779430806, "units": "Tflops", "t": 1712623267.1042833}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623266.1177864}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623266.6235235}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.185847546784824, "units": "Tflops", "t": 1712623268.5525684}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623267.1294527}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623267.635274}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623268.141172}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.104499064182402, "units": "Tflops", "t": 1712623270.0087087}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623268.6468441}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623269.1524715}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623269.658107}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.15407561625503, "units": "Tflops", "t": 1712623271.4600737}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623270.1639347}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623270.669637}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623271.1754088}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.141306103482664, "units": "Tflops", "t": 1712623272.9126792}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623271.681272}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623272.187195}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623272.692953}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.200753821591542, "units": "Tflops", "t": 1712623274.359585}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623273.1988301}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623273.7046325}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623274.2107391}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.171614314160282, "units": "Tflops", "t": 1712623275.809299}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623274.7165935}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623275.22216}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623275.7278378}, "pipe": "data"}
+{"event": "end", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "10", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "fp32"], "time": 1712623276.6227868, "return_code": 0}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/fp32.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/fp32.D1.data
new file mode 100644
index 000000000..6e5406099
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/fp32.D1.data
@@ -0,0 +1,346 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/flops", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "flops", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops", "plan": {"method": "per_gpu"}, "tags": ["diagnostic", "flops"], "argv": {"--number": 10, "--repeat": 90, "--m": 8192, "--n": 8192, "--dtype": "fp32"}, "weight": 0.0, "name": "fp32", "tag": ["fp32", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712623145.544499, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "10", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "fp32"], "time": 1712623145.5546074}, "pipe": null}
+{"event": "data", "data": {"task": "train", "rate": 15.830753257814223, "units": "Tflops", "t": 1712623148.24809}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2393.125, 24512.0], "load": 0, "temperature": null, "power": null}}, "t": 1712623146.7253928}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.11, "temperature": null, "power": null}}, "t": 1712623147.2318418}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.24, "temperature": null, "power": null}}, "t": 1712623147.7376459}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.44, "temperature": null, "power": null}}, "t": 1712623148.2433505}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.813650998681492, "units": "Tflops", "t": 1712623149.6394134}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.57, "temperature": null, "power": null}}, "t": 1712623148.7488172}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.77, "temperature": null, "power": null}}, "t": 1712623149.2544632}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.733010572325135, "units": "Tflops", "t": 1712623151.0383995}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.91, "temperature": null, "power": null}}, "t": 1712623149.7599366}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623150.265528}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623150.7711902}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.790214223039595, "units": "Tflops", "t": 1712623152.4320095}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623151.276876}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623151.782587}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623152.2880752}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.758805578994897, "units": "Tflops", "t": 1712623153.8279066}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623152.7937145}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623153.2992947}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623153.8047743}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.93276144377986, "units": "Tflops", "t": 1712623155.208458}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623154.3104286}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623154.8159285}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.918327821233802, "units": "Tflops", "t": 1712623156.5902038}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623155.321662}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623155.8271058}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623156.3327506}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.889380121856924, "units": "Tflops", "t": 1712623157.974518}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623156.8381813}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623157.343793}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623157.849403}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.889558049080742, "units": "Tflops", "t": 1712623159.3587966}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623158.355114}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623158.8607788}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.847089293972655, "units": "Tflops", "t": 1712623160.746783}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623159.366521}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623159.8720303}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623160.3775032}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.869644016509366, "units": "Tflops", "t": 1712623162.1328228}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623160.8830214}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623161.3886502}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623161.8943083}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.810926630445298, "units": "Tflops", "t": 1712623163.5240066}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623162.3999226}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623162.9054134}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623163.4109137}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.834794689651531, "units": "Tflops", "t": 1712623164.913068}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623163.9165475}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623164.4222558}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.772654533210805, "units": "Tflops", "t": 1712623166.3074517}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623164.9277878}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623165.4333866}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623165.93908}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.79944039871084, "units": "Tflops", "t": 1712623167.6996546}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623166.444576}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623166.9502096}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623167.4557748}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.774523943206438, "units": "Tflops", "t": 1712623169.0939057}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623167.961307}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623168.466913}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623168.9723773}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.780947514776013, "units": "Tflops", "t": 1712623170.4877057}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623169.477937}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623169.9836886}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.755123089906787, "units": "Tflops", "t": 1712623171.8836446}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623170.489462}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623170.9949713}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623171.500583}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.696922633536783, "units": "Tflops", "t": 1712623173.2849305}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623172.006124}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623172.511928}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623173.0176132}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.736790129465028, "units": "Tflops", "t": 1712623174.6825275}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623173.5232594}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623174.028925}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623174.5345845}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.694187594136698, "units": "Tflops", "t": 1712623176.084023}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623175.0402188}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623175.5459206}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623176.0516582}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.711473881992692, "units": "Tflops", "t": 1712623177.4838676}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623176.5576727}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623177.0633729}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.704162800555432, "units": "Tflops", "t": 1712623178.8843403}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623177.5688844}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623178.0744274}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623178.5799644}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.721688145723203, "units": "Tflops", "t": 1712623180.283432}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623179.08547}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623179.591106}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623180.096673}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.710328484479927, "units": "Tflops", "t": 1712623181.6833708}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623180.6022959}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623181.107825}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623181.6135902}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.619062712596074, "units": "Tflops", "t": 1712623183.0915024}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623182.1191628}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623182.6250367}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.67252263206669, "units": "Tflops", "t": 1712623184.4949121}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623183.1305823}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623183.6361423}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623184.1418238}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.676323802805438, "units": "Tflops", "t": 1712623185.8978972}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623184.647892}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623185.1533742}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623185.6588516}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.654009324910147, "units": "Tflops", "t": 1712623187.3028858}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623186.1644528}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623186.6700466}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623187.1755335}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.63326321308276, "units": "Tflops", "t": 1712623188.7097397}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623187.6811895}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623188.186874}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623188.6923883}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.678388981501294, "units": "Tflops", "t": 1712623190.1126864}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623189.1979895}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623189.7034874}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.635075871650029, "units": "Tflops", "t": 1712623191.5193415}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623190.2090003}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623190.7145424}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623191.2200243}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.61080684771873, "units": "Tflops", "t": 1712623192.9282072}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623191.7256114}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623192.2311904}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623192.736744}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.624396786506093, "units": "Tflops", "t": 1712623194.3358612}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623193.2422888}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623193.74783}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623194.2533374}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.614141980295074, "units": "Tflops", "t": 1712623195.7445538}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623194.7590206}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623195.2646923}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.626339759907875, "units": "Tflops", "t": 1712623197.151991}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623195.7701702}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623196.2756886}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623196.7811859}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.570683182981112, "units": "Tflops", "t": 1712623198.5644994}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623197.286708}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623197.7922149}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623198.297896}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.595394440963721, "units": "Tflops", "t": 1712623199.9747574}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623198.803401}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623199.3093035}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623199.8148751}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.566951456049187, "units": "Tflops", "t": 1712623201.3875868}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623200.3203468}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623200.8259552}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623201.3316383}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.596219852097688, "units": "Tflops", "t": 1712623202.7979288}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623201.8372936}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623202.342946}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.528874724521797, "units": "Tflops", "t": 1712623204.2141905}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623202.8485167}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623203.3540194}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623203.8595793}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.577212750429569, "units": "Tflops", "t": 1712623205.6261013}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623204.365453}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623204.8712575}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623205.3769147}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.60313247249985, "units": "Tflops", "t": 1712623207.0356576}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623205.882485}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623206.388057}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623206.8936353}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.573430561653854, "units": "Tflops", "t": 1712623208.4479055}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623207.399133}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623207.905015}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623208.4105642}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.559750603360582, "units": "Tflops", "t": 1712623209.8615203}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623208.9161415}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623209.4217176}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.57211064726882, "units": "Tflops", "t": 1712623211.2738624}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623209.9272287}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623210.4329376}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623210.9386115}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.551048623864046, "units": "Tflops", "t": 1712623212.6881452}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623211.44413}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623211.9496784}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623212.455155}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.54202375991057, "units": "Tflops", "t": 1712623214.103246}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623212.9607673}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623213.4664302}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623213.9720306}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.573285938605563, "units": "Tflops", "t": 1712623215.5155063}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623214.4778936}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623214.9834533}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623215.488975}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.518998163731826, "units": "Tflops", "t": 1712623216.932707}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623215.9944527}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623216.500205}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.495915726409207, "units": "Tflops", "t": 1712623218.3521576}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623217.005836}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623217.5114572}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623218.016993}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.563583925986906, "units": "Tflops", "t": 1712623219.7653084}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623218.52249}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623219.0279727}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623219.5337174}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.489204406016038, "units": "Tflops", "t": 1712623221.1852386}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623220.0392148}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623220.5447636}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623221.0503793}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.514784855393128, "units": "Tflops", "t": 1712623222.6028166}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623221.5559273}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623222.061486}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623222.5670536}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.510362560666776, "units": "Tflops", "t": 1712623224.0208066}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623223.0726426}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623223.578161}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.518883272443889, "units": "Tflops", "t": 1712623225.437979}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623224.083624}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623224.5892918}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623225.09485}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.528723084443982, "units": "Tflops", "t": 1712623226.8544161}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623225.6008012}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623226.106304}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623226.6118894}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.479074281715134, "units": "Tflops", "t": 1712623228.275269}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623227.1175585}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623227.6230993}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623228.128695}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.51951258422151, "units": "Tflops", "t": 1712623229.69242}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623228.6343675}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623229.1400478}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623229.6458056}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.421460625539261, "units": "Tflops", "t": 1712623231.1186302}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623230.1517532}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623230.6574292}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.510519059031894, "units": "Tflops", "t": 1712623232.5365663}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623231.1629505}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623231.6685958}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623232.1741056}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.470931968813108, "units": "Tflops", "t": 1712623233.9581635}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623232.6796753}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623233.1852999}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623233.690853}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.472021960317829, "units": "Tflops", "t": 1712623235.379665}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623234.1963584}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623234.7018635}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623235.2075095}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.475053982412833, "units": "Tflops", "t": 1712623236.8008847}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623235.713012}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623236.2185848}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623236.7241063}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.474830693682982, "units": "Tflops", "t": 1712623238.2221272}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623237.229696}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623237.7352273}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.444334548761264, "units": "Tflops", "t": 1712623239.6463091}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623238.240757}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623238.7462344}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623239.251835}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.497063921095984, "units": "Tflops", "t": 1712623241.065512}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623239.7573059}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623240.262966}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623240.768433}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.465696960114222, "units": "Tflops", "t": 1712623242.4875984}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623241.273931}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623241.7797134}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623242.2860172}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.445045764513623, "units": "Tflops", "t": 1712623243.9115763}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623242.791773}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623243.2974327}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623243.8031073}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.466041873931063, "units": "Tflops", "t": 1712623245.333633}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623244.308731}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623244.8142886}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623245.3197942}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.434498044877511, "units": "Tflops", "t": 1712623246.7585943}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623245.827237}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623246.332962}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.438099345601591, "units": "Tflops", "t": 1712623248.1831894}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623246.8385482}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623247.344074}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623247.849558}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.449055660510982, "units": "Tflops", "t": 1712623249.6068113}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623248.3550918}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623248.860726}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623249.366241}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.431166917325502, "units": "Tflops", "t": 1712623251.0322201}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623249.8717835}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623250.3772936}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623250.8828423}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.444965587825738, "units": "Tflops", "t": 1712623252.4562101}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623251.388423}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623251.8940408}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623252.3997426}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.470241720290552, "units": "Tflops", "t": 1712623253.8778777}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623252.9055157}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623253.411234}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.425138393941737, "units": "Tflops", "t": 1712623255.3036613}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623253.9167042}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623254.4224048}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623254.9279134}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.42167464112951, "units": "Tflops", "t": 1712623256.7298112}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623255.4333863}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623255.9389758}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623256.4445899}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.452415227252208, "units": "Tflops", "t": 1712623258.1531146}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623256.9500868}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623257.4555774}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623257.961059}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.451281404270627, "units": "Tflops", "t": 1712623259.5765262}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623258.466579}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623258.9721086}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623259.4776971}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.445992429633431, "units": "Tflops", "t": 1712623261.0004308}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623259.983253}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623260.4890084}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623260.9945345}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.453518146226935, "units": "Tflops", "t": 1712623262.4236379}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623261.5001075}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623262.0058057}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.409335908059571, "units": "Tflops", "t": 1712623263.8508973}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623262.5113723}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623263.0169685}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623263.522537}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.430072349038442, "units": "Tflops", "t": 1712623265.27642}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623264.028117}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623264.5336072}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623265.0389261}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.446336465105006, "units": "Tflops", "t": 1712623266.7003036}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623265.5444288}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623266.0499384}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623266.5555406}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.45413181015789, "units": "Tflops", "t": 1712623268.123451}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623267.0611815}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623267.5666897}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623268.07232}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.453538859826452, "units": "Tflops", "t": 1712623269.5466483}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623268.577844}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623269.083415}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.43428625598157, "units": "Tflops", "t": 1712623270.9715965}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623269.5890028}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623270.0945287}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623270.6000333}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.443773381110223, "units": "Tflops", "t": 1712623272.3957005}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623271.1056244}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623271.6111553}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623272.1166632}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 15.385887852953305, "units": "Tflops", "t": 1712623273.825148}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623272.6222053}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623273.1277604}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623273.633431}, "pipe": "data"}
+{"event": "end", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "10", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "fp32"], "time": 1712623274.5361629, "return_code": 0}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/llama.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/llama.D0.data
new file mode 100644
index 000000000..aa155b372
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/llama.D0.data
@@ -0,0 +1,32 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/llm", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "llm", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 800, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/llama", "plan": {"method": "per_gpu"}, "tags": ["llm", "nlp"], "weight": 1.0, "name": "llama", "tag": ["llama", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712622931.394082, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["python", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/llama/main.py", "--cache", "/Users/satyaortiz-gagne/travail/mila/milabench/cache"], "time": 1712622933.7213192}, "pipe": null}
+{"event": "line", "data": "Dataset\n", "pipe": "stderr"}
+{"event": "line", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/datasets/table.py:1421: FutureWarning: promote has been superseded by mode='default'.\n", "pipe": "stderr"}
+{"event": "line", "data": " table = cls._concat_blocks(blocks, axis=0)\n", "pipe": "stderr"}
+{"event": "line", "data": "Tokenizer\n", "pipe": "stderr"}
+{"event": "line", "data": "Model\n", "pipe": "stderr"}
+{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/llama/main.py\", line 231, in \n", "pipe": "stderr"}
+{"event": "line", "data": " main()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/llama/main.py\", line 227, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " return huggingface_main(args, model, config)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/llama/main.py\", line 143, in huggingface_main\n", "pipe": "stderr"}
+{"event": "line", "data": " model = LlamaForCausalLM(LlamaConfig.from_dict(config)).cuda()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/modeling_utils.py\", line 2243, in cuda\n", "pipe": "stderr"}
+{"event": "line", "data": " return super().cuda(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 918, in cuda\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._apply(lambda t: t.cuda(device))\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 810, in _apply\n", "pipe": "stderr"}
+{"event": "line", "data": " module._apply(fn)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 810, in _apply\n", "pipe": "stderr"}
+{"event": "line", "data": " module._apply(fn)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 810, in _apply\n", "pipe": "stderr"}
+{"event": "line", "data": " module._apply(fn)\n", "pipe": "stderr"}
+{"event": "line", "data": " [Previous line repeated 2 more times]\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 833, in _apply\n", "pipe": "stderr"}
+{"event": "line", "data": " param_applied = fn(param)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 918, in \n", "pipe": "stderr"}
+{"event": "line", "data": " return self._apply(lambda t: t.cuda(device))\n", "pipe": "stderr"}
+{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 172.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 169.62 MiB is free. Including non-PyTorch memory, this process has 21.43 GiB memory in use. Of the allocated memory 21.20 GiB is allocated by PyTorch, and 9.14 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"}
+{"event": "end", "data": {"command": ["python", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/llama/main.py", "--cache", "/Users/satyaortiz-gagne/travail/mila/milabench/cache"], "time": 1712622989.8527756, "return_code": 1}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/llama.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/llama.D1.data
new file mode 100644
index 000000000..3a7487f6a
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/llama.D1.data
@@ -0,0 +1,32 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/llm", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "llm", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 800, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/llama", "plan": {"method": "per_gpu"}, "tags": ["llm", "nlp"], "weight": 1.0, "name": "llama", "tag": ["llama", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712622933.712586, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["python", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/llama/main.py", "--cache", "/Users/satyaortiz-gagne/travail/mila/milabench/cache"], "time": 1712622933.7219312}, "pipe": null}
+{"event": "line", "data": "Dataset\n", "pipe": "stderr"}
+{"event": "line", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/datasets/table.py:1421: FutureWarning: promote has been superseded by mode='default'.\n", "pipe": "stderr"}
+{"event": "line", "data": " table = cls._concat_blocks(blocks, axis=0)\n", "pipe": "stderr"}
+{"event": "line", "data": "Tokenizer\n", "pipe": "stderr"}
+{"event": "line", "data": "Model\n", "pipe": "stderr"}
+{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/llama/main.py\", line 231, in \n", "pipe": "stderr"}
+{"event": "line", "data": " main()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/llama/main.py\", line 227, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " return huggingface_main(args, model, config)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/llama/main.py\", line 143, in huggingface_main\n", "pipe": "stderr"}
+{"event": "line", "data": " model = LlamaForCausalLM(LlamaConfig.from_dict(config)).cuda()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/modeling_utils.py\", line 2243, in cuda\n", "pipe": "stderr"}
+{"event": "line", "data": " return super().cuda(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 918, in cuda\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._apply(lambda t: t.cuda(device))\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 810, in _apply\n", "pipe": "stderr"}
+{"event": "line", "data": " module._apply(fn)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 810, in _apply\n", "pipe": "stderr"}
+{"event": "line", "data": " module._apply(fn)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 810, in _apply\n", "pipe": "stderr"}
+{"event": "line", "data": " module._apply(fn)\n", "pipe": "stderr"}
+{"event": "line", "data": " [Previous line repeated 2 more times]\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 833, in _apply\n", "pipe": "stderr"}
+{"event": "line", "data": " param_applied = fn(param)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 918, in \n", "pipe": "stderr"}
+{"event": "line", "data": " return self._apply(lambda t: t.cuda(device))\n", "pipe": "stderr"}
+{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 172.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 169.62 MiB is free. Including non-PyTorch memory, this process has 21.43 GiB memory in use. Of the allocated memory 21.20 GiB is allocated by PyTorch, and 9.14 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"}
+{"event": "end", "data": {"command": ["python", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/llama/main.py", "--cache", "/Users/satyaortiz-gagne/travail/mila/milabench/cache"], "time": 1712622989.2747521, "return_code": 1}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/opt-1_3b-multinode.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/opt-1_3b-multinode.data
new file mode 100644
index 000000000..3b45f0015
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/opt-1_3b-multinode.data
@@ -0,0 +1 @@
+{"event": "message", "data": {"message": "Skip opt-1_3b-multinode because the following capability is not satisfied: len(nodes) >= 2"}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/opt-1_3b.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/opt-1_3b.data
new file mode 100644
index 000000000..3e2c6cea0
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/opt-1_3b.data
@@ -0,0 +1 @@
+{"event": "error", "data": {"type": "KeyError", "message": "'port'", "trace": "Traceback (most recent call last):\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/multi.py\", line 202, in do_run\n await exec_plan.execute(\"run\", timeout=True, timeout_delay=600)\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 108, in execute\n return await execute_command(self, phase, timeout, timeout_delay, **kwargs)\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/executors.py\", line 57, in execute_command\n for pack, argv, _kwargs in command.commands():\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 150, in commands\n yield from executor.commands()\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 150, in commands\n yield from executor.commands()\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 125, in commands\n yield self.pack, self.argv(), self.kwargs()\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 121, in argv\n return self._argv(**kwargs) + self.exec.argv(**kwargs)\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 304, in argv\n script_args = self.exec.argv(**kwargs)\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 122, in argv\n return self._argv(**kwargs)\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 682, in _argv\n f\"--main_process_port={manager['port']}\",\nKeyError: 'port'\n"}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/opt-6_7b-multinode.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/opt-6_7b-multinode.data
new file mode 100644
index 000000000..cccd5c098
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/opt-6_7b-multinode.data
@@ -0,0 +1 @@
+{"event": "message", "data": {"message": "Skip opt-6_7b-multinode because the following capability is not satisfied: len(nodes) >= 2"}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/opt-6_7b.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/opt-6_7b.data
new file mode 100644
index 000000000..3e2c6cea0
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/opt-6_7b.data
@@ -0,0 +1 @@
+{"event": "error", "data": {"type": "KeyError", "message": "'port'", "trace": "Traceback (most recent call last):\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/multi.py\", line 202, in do_run\n await exec_plan.execute(\"run\", timeout=True, timeout_delay=600)\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 108, in execute\n return await execute_command(self, phase, timeout, timeout_delay, **kwargs)\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/executors.py\", line 57, in execute_command\n for pack, argv, _kwargs in command.commands():\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 150, in commands\n yield from executor.commands()\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 150, in commands\n yield from executor.commands()\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 125, in commands\n yield self.pack, self.argv(), self.kwargs()\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 121, in argv\n return self._argv(**kwargs) + self.exec.argv(**kwargs)\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 304, in argv\n script_args = self.exec.argv(**kwargs)\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 122, in argv\n return self._argv(**kwargs)\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 682, in _argv\n f\"--main_process_port={manager['port']}\",\nKeyError: 'port'\n"}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/reformer.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/reformer.D0.data
new file mode 100644
index 000000000..276fe735e
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/reformer.D0.data
@@ -0,0 +1,47 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "hf", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "argv": {"--precision": "tf32-fp16", "--num-workers": 8, "--model": "Reformer", "--batch-size": 64}, "plan": {"method": "per_gpu"}, "tags": ["huggingface", "language-modeling", "nlp", "transformer"], "weight": 1.0, "name": "reformer", "tag": ["reformer", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712623454.950918, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-reformer.D0-0efae956f1553a76c1e03985181900f5.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "Reformer", "--batch-size", "64"], "time": 1712623457.3343341}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"}
+{"event": "line", "data": "We strongly recommend passing in an `attention_mask` since your input_ids may be padded. See https://huggingface.co/docs/transformers/troubleshooting#incorrect-output-when-padding-tokens-arent-masked.\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [23536.375, 24512.0], "load": 0.47, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 1.50 GiB. GPU 0 has a total capacty of 23.73 GiB of which 975.62 MiB is free. Including non-PyTorch memory, this process has 20.65 GiB memory in use. Of the allocated memory 18.80 GiB is allocated by PyTorch, and 1.56 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"}
+{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"}
+{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"}
+{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"}
+{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 156, in \n", "pipe": "stderr"}
+{"event": "line", "data": " main()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 152, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " runner.train()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 70, in train\n", "pipe": "stderr"}
+{"event": "line", "data": " self.step(data)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 59, in step\n", "pipe": "stderr"}
+{"event": "line", "data": " self.amp_scaler.scale(loss).backward()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/_tensor.py\", line 492, in backward\n", "pipe": "stderr"}
+{"event": "line", "data": " torch.autograd.backward(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/autograd/__init__.py\", line 251, in backward\n", "pipe": "stderr"}
+{"event": "line", "data": " Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/autograd/function.py\", line 288, in apply\n", "pipe": "stderr"}
+{"event": "line", "data": " return user_fn(self, *args)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/reformer/modeling_reformer.py\", line 1677, in backward\n", "pipe": "stderr"}
+{"event": "line", "data": " output = layer.backward_pass(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/reformer/modeling_reformer.py\", line 1564, in backward_pass\n", "pipe": "stderr"}
+{"event": "line", "data": " output.backward(grad_attn_output, retain_graph=True)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/_tensor.py\", line 492, in backward\n", "pipe": "stderr"}
+{"event": "line", "data": " torch.autograd.backward(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/autograd/__init__.py\", line 251, in backward\n", "pipe": "stderr"}
+{"event": "line", "data": " Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass\n", "pipe": "stderr"}
+{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 1.50 GiB. GPU 0 has a total capacty of 23.73 GiB of which 975.62 MiB is free. Including non-PyTorch memory, this process has 20.65 GiB memory in use. Of the allocated memory 18.80 GiB is allocated by PyTorch, and 1.56 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-reformer.D0-0efae956f1553a76c1e03985181900f5.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "Reformer", "--batch-size", "64"], "time": 1712623461.6970937, "return_code": 1}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/reformer.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/reformer.D1.data
new file mode 100644
index 000000000..2db78ffe5
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/reformer.D1.data
@@ -0,0 +1,47 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "hf", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "argv": {"--precision": "tf32-fp16", "--num-workers": 8, "--model": "Reformer", "--batch-size": 64}, "plan": {"method": "per_gpu"}, "tags": ["huggingface", "language-modeling", "nlp", "transformer"], "weight": 1.0, "name": "reformer", "tag": ["reformer", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712623457.317218, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-reformer.D1-0efae956f1553a76c1e03985181900f5.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "Reformer", "--batch-size", "64"], "time": 1712623457.3427749}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"}
+{"event": "line", "data": "We strongly recommend passing in an `attention_mask` since your input_ids may be padded. See https://huggingface.co/docs/transformers/troubleshooting#incorrect-output-when-padding-tokens-arent-masked.\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [23536.375, 24512.0], "load": 0.43, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 1.50 GiB. GPU 0 has a total capacty of 23.73 GiB of which 975.62 MiB is free. Including non-PyTorch memory, this process has 20.65 GiB memory in use. Of the allocated memory 18.80 GiB is allocated by PyTorch, and 1.56 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"}
+{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"}
+{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"}
+{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"}
+{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 156, in \n", "pipe": "stderr"}
+{"event": "line", "data": " main()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 152, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " runner.train()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 70, in train\n", "pipe": "stderr"}
+{"event": "line", "data": " self.step(data)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 59, in step\n", "pipe": "stderr"}
+{"event": "line", "data": " self.amp_scaler.scale(loss).backward()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/_tensor.py\", line 492, in backward\n", "pipe": "stderr"}
+{"event": "line", "data": " torch.autograd.backward(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/autograd/__init__.py\", line 251, in backward\n", "pipe": "stderr"}
+{"event": "line", "data": " Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/autograd/function.py\", line 288, in apply\n", "pipe": "stderr"}
+{"event": "line", "data": " return user_fn(self, *args)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/reformer/modeling_reformer.py\", line 1677, in backward\n", "pipe": "stderr"}
+{"event": "line", "data": " output = layer.backward_pass(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/reformer/modeling_reformer.py\", line 1564, in backward_pass\n", "pipe": "stderr"}
+{"event": "line", "data": " output.backward(grad_attn_output, retain_graph=True)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/_tensor.py\", line 492, in backward\n", "pipe": "stderr"}
+{"event": "line", "data": " torch.autograd.backward(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/autograd/__init__.py\", line 251, in backward\n", "pipe": "stderr"}
+{"event": "line", "data": " Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass\n", "pipe": "stderr"}
+{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 1.50 GiB. GPU 0 has a total capacty of 23.73 GiB of which 975.62 MiB is free. Including non-PyTorch memory, this process has 20.65 GiB memory in use. Of the allocated memory 18.80 GiB is allocated by PyTorch, and 1.56 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-reformer.D1-0efae956f1553a76c1e03985181900f5.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "Reformer", "--batch-size", "64"], "time": 1712623461.637022, "return_code": 1}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/regnet_y_128gf.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/regnet_y_128gf.D0.data
new file mode 100644
index 000000000..0b8f0aae6
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/regnet_y_128gf.D0.data
@@ -0,0 +1,79 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "torchvision", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision", "plan": {"method": "per_gpu"}, "argv": {"--precision": "tf32-fp16", "--lr": 0.01, "--no-stdout": true, "--epochs": 50, "--model": "regnet_y_128gf", "--batch-size": 64}, "tags": ["classification", "convnet", "lstm", "resnet", "vision"], "weight": 2.0, "name": "regnet_y_128gf", "tag": ["regnet_y_128gf", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712623396.834406, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-regnet_y_128gf.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32-fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "regnet_y_128gf", "--batch-size", "64"], "time": 1712623399.244224}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "progress": [0, 50]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2393.125, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"}
+{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py:456: UserWarning: Applied workaround for CuDNN issue, install nvrtc.so (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:80.)\n", "pipe": "stderr"}
+{"event": "line", "data": " return F.conv2d(input, weight, bias, self.stride,\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24360.375, 24512.0], "load": 0.06, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24430.375, 24512.0], "load": 0.12, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 70.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 81.62 MiB is free. Including non-PyTorch memory, this process has 21.52 GiB memory in use. Of the allocated memory 20.37 GiB is allocated by PyTorch, and 892.38 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"}
+{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"}
+{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"}
+{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"}
+{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 224, in \n", "pipe": "stderr"}
+{"event": "line", "data": " main()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 218, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " train_epoch(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 51, in train_epoch\n", "pipe": "stderr"}
+{"event": "line", "data": " output = model(inp)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/regnet.py\", line 378, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " x = self.trunk_output(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/regnet.py\", line 147, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " x = x + self.f(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py\", line 460, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._conv_forward(input, self.weight, self.bias)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py\", line 456, in _conv_forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return F.conv2d(input, weight, bias, self.stride,\n", "pipe": "stderr"}
+{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 70.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 81.62 MiB is free. Including non-PyTorch memory, this process has 21.52 GiB memory in use. Of the allocated memory 20.37 GiB is allocated by PyTorch, and 892.38 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-regnet_y_128gf.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32-fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "regnet_y_128gf", "--batch-size", "64"], "time": 1712623407.6523812, "return_code": 1}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/regnet_y_128gf.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/regnet_y_128gf.D1.data
new file mode 100644
index 000000000..6ad3f4eba
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/regnet_y_128gf.D1.data
@@ -0,0 +1,79 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "torchvision", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision", "plan": {"method": "per_gpu"}, "argv": {"--precision": "tf32-fp16", "--lr": 0.01, "--no-stdout": true, "--epochs": 50, "--model": "regnet_y_128gf", "--batch-size": 64}, "tags": ["classification", "convnet", "lstm", "resnet", "vision"], "weight": 2.0, "name": "regnet_y_128gf", "tag": ["regnet_y_128gf", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712623399.225575, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-regnet_y_128gf.D1-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32-fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "regnet_y_128gf", "--batch-size", "64"], "time": 1712623399.2513795}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "progress": [0, 50]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2393.125, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"}
+{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py:456: UserWarning: Applied workaround for CuDNN issue, install nvrtc.so (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:80.)\n", "pipe": "stderr"}
+{"event": "line", "data": " return F.conv2d(input, weight, bias, self.stride,\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [10386.375, 24512.0], "load": 0.05, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24430.375, 24512.0], "load": 0.11, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 70.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 81.62 MiB is free. Including non-PyTorch memory, this process has 21.52 GiB memory in use. Of the allocated memory 20.37 GiB is allocated by PyTorch, and 892.38 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"}
+{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"}
+{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"}
+{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"}
+{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 224, in \n", "pipe": "stderr"}
+{"event": "line", "data": " main()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 218, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " train_epoch(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 51, in train_epoch\n", "pipe": "stderr"}
+{"event": "line", "data": " output = model(inp)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/regnet.py\", line 378, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " x = self.trunk_output(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/regnet.py\", line 147, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " x = x + self.f(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py\", line 460, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._conv_forward(input, self.weight, self.bias)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py\", line 456, in _conv_forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return F.conv2d(input, weight, bias, self.stride,\n", "pipe": "stderr"}
+{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 70.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 81.62 MiB is free. Including non-PyTorch memory, this process has 21.52 GiB memory in use. Of the allocated memory 20.37 GiB is allocated by PyTorch, and 892.38 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-regnet_y_128gf.D1-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32-fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "regnet_y_128gf", "--batch-size", "64"], "time": 1712623407.8203948, "return_code": 1}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/resnet152-multi.0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/resnet152-multi.0.data
new file mode 100644
index 000000000..cb4865bd4
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/resnet152-multi.0.data
@@ -0,0 +1,188 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "timm", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm", "plan": {"method": "njobs", "n": 1}, "argv": {"--amp": true, "--model": "resnet152", "--batch-size": 256}, "tags": ["classification", "convnet", "multigpu", "resnet", "vision"], "weight": 5.0, "name": "resnet152-multi", "tag": ["resnet152-multi", "0"], "job-number": 0, "devices": ["0", "1"]}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712623494.63799, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["torchrun", "--nproc_per_node=2", "--", "-m", "voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-resnet152-multi.0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "resnet152", "--batch-size", "256", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/vodofeze.2024-04-09_00:35:28.669482/resnet152-multi.0", "--checkpoint-hist", "1"], "time": 1712623494.654947}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "line", "data": "Training in distributed mode with multiple processes, 1 device per process.Process 1, total 2, device cuda:1.\n", "pipe": "stderr"}
+{"event": "line", "data": "Training in distributed mode with multiple processes, 1 device per process.Process 0, total 2, device cuda:0.\n", "pipe": "stderr"}
+{"event": "line", "data": "Model resnet152 created, param count:60192808\n", "pipe": "stderr"}
+{"event": "line", "data": "Data processing configuration for current model + dataset:\n", "pipe": "stderr"}
+{"event": "line", "data": "\tinput_size: (3, 224, 224)\n", "pipe": "stderr"}
+{"event": "line", "data": "\tinterpolation: bicubic\n", "pipe": "stderr"}
+{"event": "line", "data": "\tmean: (0.485, 0.456, 0.406)\n", "pipe": "stderr"}
+{"event": "line", "data": "\tstd: (0.229, 0.224, 0.225)\n", "pipe": "stderr"}
+{"event": "line", "data": "\tcrop_pct: 0.95\n", "pipe": "stderr"}
+{"event": "line", "data": "\tcrop_mode: center\n", "pipe": "stderr"}
+{"event": "line", "data": "Learning rate (0.2) calculated from base learning rate (0.1) and global batch size (512) with linear scaling.\n", "pipe": "stderr"}
+{"event": "line", "data": "Using native Torch AMP. Training in mixed precision.\n", "pipe": "stderr"}
+{"event": "line", "data": "Using native Torch DistributedDataParallel.\n", "pipe": "stderr"}
+{"event": "line", "data": "Scheduled epochs: 300. LR stepped per epoch.\n", "pipe": "stderr"}
+{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py:456: UserWarning: Applied workaround for CuDNN issue, install nvrtc.so (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:80.)\n", "pipe": "stderr"}
+{"event": "line", "data": " return F.conv2d(input, weight, bias, self.stride,\n", "pipe": "stderr"}
+{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py:456: UserWarning: Applied workaround for CuDNN issue, install nvrtc.so (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:80.)\n", "pipe": "stderr"}
+{"event": "line", "data": " return F.conv2d(input, weight, bias, self.stride,\n", "pipe": "stderr"}
+{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 98.00 MiB. GPU 1 has a total capacty of 23.73 GiB of which 133.62 MiB is free. Including non-PyTorch memory, this process has 21.47 GiB memory in use. Of the allocated memory 20.87 GiB is allocated by PyTorch, and 221.64 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/home/ubuntu/miniconda3/lib/python3.10/runpy.py\", line 196, in _run_module_as_main\n", "pipe": "stderr"}
+{"event": "line", "data": " return _run_code(code, main_globals, None,\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/home/ubuntu/miniconda3/lib/python3.10/runpy.py\", line 86, in _run_code\n", "pipe": "stderr"}
+{"event": "line", "data": " exec(code, run_globals)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/__main__.py\", line 4, in \n", "pipe": "stderr"}
+{"event": "line", "data": " main()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"}
+{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"}
+{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"}
+{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 1035, in \n", "pipe": "stderr"}
+{"event": "line", "data": " main()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 759, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " train_metrics = train_one_epoch(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 874, in train_one_epoch\n", "pipe": "stderr"}
+{"event": "line", "data": " output = model(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/parallel/distributed.py\", line 1519, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " else self._run_ddp_forward(*inputs, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/parallel/distributed.py\", line 1355, in _run_ddp_forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return self.module(*inputs, **kwargs) # type: ignore[index]\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/resnet.py\", line 835, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " x = self.forward_features(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/resnet.py\", line 824, in forward_features\n", "pipe": "stderr"}
+{"event": "line", "data": " x = self.layer3(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/resnet.py\", line 485, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " x = self.conv3(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py\", line 460, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._conv_forward(input, self.weight, self.bias)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py\", line 456, in _conv_forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return F.conv2d(input, weight, bias, self.stride,\n", "pipe": "stderr"}
+{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 98.00 MiB. GPU 1 has a total capacty of 23.73 GiB of which 133.62 MiB is free. Including non-PyTorch memory, this process has 21.47 GiB memory in use. Of the allocated memory 20.87 GiB is allocated by PyTorch, and 221.64 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [14606.375, 24512.0], "load": 0.26, "temperature": null, "power": null}, "1": {"memory": [14516.375, 24512.0], "load": 0.25, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24470.375, 24512.0], "load": 0.44, "temperature": null, "power": null}, "1": {"memory": [24378.375, 24512.0], "load": 0.4, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 26.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 41.62 MiB is free. Including non-PyTorch memory, this process has 21.56 GiB memory in use. Of the allocated memory 20.84 GiB is allocated by PyTorch, and 338.14 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/home/ubuntu/miniconda3/lib/python3.10/runpy.py\", line 196, in _run_module_as_main\n", "pipe": "stderr"}
+{"event": "line", "data": " return _run_code(code, main_globals, None,\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/home/ubuntu/miniconda3/lib/python3.10/runpy.py\", line 86, in _run_code\n", "pipe": "stderr"}
+{"event": "line", "data": " exec(code, run_globals)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/__main__.py\", line 4, in \n", "pipe": "stderr"}
+{"event": "line", "data": " main()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"}
+{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"}
+{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"}
+{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 1035, in \n", "pipe": "stderr"}
+{"event": "line", "data": " main()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 759, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " train_metrics = train_one_epoch(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 874, in train_one_epoch\n", "pipe": "stderr"}
+{"event": "line", "data": " output = model(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/parallel/distributed.py\", line 1519, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " else self._run_ddp_forward(*inputs, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/parallel/distributed.py\", line 1355, in _run_ddp_forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return self.module(*inputs, **kwargs) # type: ignore[index]\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/resnet.py\", line 835, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " x = self.forward_features(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/resnet.py\", line 824, in forward_features\n", "pipe": "stderr"}
+{"event": "line", "data": " x = self.layer3(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/resnet.py\", line 480, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " x = self.bn2(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/batchnorm.py\", line 171, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return F.batch_norm(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/functional.py\", line 2478, in batch_norm\n", "pipe": "stderr"}
+{"event": "line", "data": " return torch.batch_norm(\n", "pipe": "stderr"}
+{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 26.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 41.62 MiB is free. Including non-PyTorch memory, this process has 21.56 GiB memory in use. Of the allocated memory 20.84 GiB is allocated by PyTorch, and 338.14 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"}
+{"event": "line", "data": "[2024-04-09 00:45:05,663] torch.distributed.elastic.multiprocessing.api: [ERROR] failed (exitcode: 1) local_rank: 0 (pid: 50233) of binary: /mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/python\n", "pipe": "stderr"}
+{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/torchrun\", line 8, in \n", "pipe": "stderr"}
+{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py\", line 346, in wrapper\n", "pipe": "stderr"}
+{"event": "line", "data": " return f(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/distributed/run.py\", line 806, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " run(args)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/distributed/run.py\", line 797, in run\n", "pipe": "stderr"}
+{"event": "line", "data": " elastic_launch(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/distributed/launcher/api.py\", line 134, in __call__\n", "pipe": "stderr"}
+{"event": "line", "data": " return launch_agent(self._config, self._entrypoint, list(args))\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/distributed/launcher/api.py\", line 264, in launch_agent\n", "pipe": "stderr"}
+{"event": "line", "data": " raise ChildFailedError(\n", "pipe": "stderr"}
+{"event": "line", "data": "torch.distributed.elastic.multiprocessing.errors.ChildFailedError: \n", "pipe": "stderr"}
+{"event": "line", "data": "============================================================\n", "pipe": "stderr"}
+{"event": "line", "data": "voir FAILED\n", "pipe": "stderr"}
+{"event": "line", "data": "------------------------------------------------------------\n", "pipe": "stderr"}
+{"event": "line", "data": "Failures:\n", "pipe": "stderr"}
+{"event": "line", "data": "[1]:\n", "pipe": "stderr"}
+{"event": "line", "data": " time : 2024-04-09_00:45:05\n", "pipe": "stderr"}
+{"event": "line", "data": " host : delicatemastodon.internal.cloudapp.net\n", "pipe": "stderr"}
+{"event": "line", "data": " rank : 1 (local_rank: 1)\n", "pipe": "stderr"}
+{"event": "line", "data": " exitcode : 1 (pid: 50234)\n", "pipe": "stderr"}
+{"event": "line", "data": " error_file: \n", "pipe": "stderr"}
+{"event": "line", "data": " traceback : To enable traceback see: https://pytorch.org/docs/stable/elastic/errors.html\n", "pipe": "stderr"}
+{"event": "line", "data": "------------------------------------------------------------\n", "pipe": "stderr"}
+{"event": "line", "data": "Root Cause (first observed failure):\n", "pipe": "stderr"}
+{"event": "line", "data": "[0]:\n", "pipe": "stderr"}
+{"event": "line", "data": " time : 2024-04-09_00:45:05\n", "pipe": "stderr"}
+{"event": "line", "data": " host : delicatemastodon.internal.cloudapp.net\n", "pipe": "stderr"}
+{"event": "line", "data": " rank : 0 (local_rank: 0)\n", "pipe": "stderr"}
+{"event": "line", "data": " exitcode : 1 (pid: 50233)\n", "pipe": "stderr"}
+{"event": "line", "data": " error_file: \n", "pipe": "stderr"}
+{"event": "line", "data": " traceback : To enable traceback see: https://pytorch.org/docs/stable/elastic/errors.html\n", "pipe": "stderr"}
+{"event": "line", "data": "============================================================\n", "pipe": "stderr"}
+{"event": "end", "data": {"command": ["torchrun", "--nproc_per_node=2", "--", "-m", "voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-resnet152-multi.0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "resnet152", "--batch-size", "256", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/vodofeze.2024-04-09_00:35:28.669482/resnet152-multi.0", "--checkpoint-hist", "1"], "time": 1712623505.916635, "return_code": 1}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/resnet152.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/resnet152.D0.data
new file mode 100644
index 000000000..0c6cc0aba
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/resnet152.D0.data
@@ -0,0 +1,72 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "timm", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm", "plan": {"method": "per_gpu"}, "argv": {"--amp": true, "--model": "resnet152", "--batch-size": 256}, "tags": ["classification", "convnet", "resnet", "vision"], "weight": 1.0, "name": "resnet152", "tag": ["resnet152", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712623482.010782, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-resnet152.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "resnet152", "--batch-size", "256", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/vodofeze.2024-04-09_00:35:28.669482/resnet152.D0", "--checkpoint-hist", "1"], "time": 1712623484.358363}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "line", "data": "Training with a single process on 1 device (cuda:0).\n", "pipe": "stderr"}
+{"event": "line", "data": "Model resnet152 created, param count:60192808\n", "pipe": "stderr"}
+{"event": "line", "data": "Data processing configuration for current model + dataset:\n", "pipe": "stderr"}
+{"event": "line", "data": "\tinput_size: (3, 224, 224)\n", "pipe": "stderr"}
+{"event": "line", "data": "\tinterpolation: bicubic\n", "pipe": "stderr"}
+{"event": "line", "data": "\tmean: (0.485, 0.456, 0.406)\n", "pipe": "stderr"}
+{"event": "line", "data": "\tstd: (0.229, 0.224, 0.225)\n", "pipe": "stderr"}
+{"event": "line", "data": "\tcrop_pct: 0.95\n", "pipe": "stderr"}
+{"event": "line", "data": "\tcrop_mode: center\n", "pipe": "stderr"}
+{"event": "line", "data": "Learning rate (0.1) calculated from base learning rate (0.1) and global batch size (256) with linear scaling.\n", "pipe": "stderr"}
+{"event": "line", "data": "Using native Torch AMP. Training in mixed precision.\n", "pipe": "stderr"}
+{"event": "line", "data": "Scheduled epochs: 300. LR stepped per epoch.\n", "pipe": "stderr"}
+{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py:456: UserWarning: Applied workaround for CuDNN issue, install nvrtc.so (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:80.)\n", "pipe": "stderr"}
+{"event": "line", "data": " return F.conv2d(input, weight, bias, self.stride,\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24458.375, 24512.0], "load": 0.3, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24458.375, 24512.0], "load": 0.3, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 98.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 53.62 MiB is free. Including non-PyTorch memory, this process has 21.55 GiB memory in use. Of the allocated memory 21.03 GiB is allocated by PyTorch, and 225.13 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"}
+{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"}
+{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"}
+{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"}
+{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 1035, in \n", "pipe": "stderr"}
+{"event": "line", "data": " main()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 759, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " train_metrics = train_one_epoch(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 874, in train_one_epoch\n", "pipe": "stderr"}
+{"event": "line", "data": " output = model(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/resnet.py\", line 835, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " x = self.forward_features(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/resnet.py\", line 824, in forward_features\n", "pipe": "stderr"}
+{"event": "line", "data": " x = self.layer3(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/resnet.py\", line 486, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " x = self.bn3(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/batchnorm.py\", line 171, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return F.batch_norm(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/functional.py\", line 2478, in batch_norm\n", "pipe": "stderr"}
+{"event": "line", "data": " return torch.batch_norm(\n", "pipe": "stderr"}
+{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 98.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 53.62 MiB is free. Including non-PyTorch memory, this process has 21.55 GiB memory in use. Of the allocated memory 21.03 GiB is allocated by PyTorch, and 225.13 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-resnet152.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "resnet152", "--batch-size", "256", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/vodofeze.2024-04-09_00:35:28.669482/resnet152.D0", "--checkpoint-hist", "1"], "time": 1712623491.4809077, "return_code": 1}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/resnet152.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/resnet152.D1.data
new file mode 100644
index 000000000..9b8f5d5fc
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/resnet152.D1.data
@@ -0,0 +1,72 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "timm", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm", "plan": {"method": "per_gpu"}, "argv": {"--amp": true, "--model": "resnet152", "--batch-size": 256}, "tags": ["classification", "convnet", "resnet", "vision"], "weight": 1.0, "name": "resnet152", "tag": ["resnet152", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712623484.341721, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-resnet152.D1-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "resnet152", "--batch-size", "256", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/vodofeze.2024-04-09_00:35:28.669482/resnet152.D1", "--checkpoint-hist", "1"], "time": 1712623484.36675}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "line", "data": "Training with a single process on 1 device (cuda:0).\n", "pipe": "stderr"}
+{"event": "line", "data": "Model resnet152 created, param count:60192808\n", "pipe": "stderr"}
+{"event": "line", "data": "Data processing configuration for current model + dataset:\n", "pipe": "stderr"}
+{"event": "line", "data": "\tinput_size: (3, 224, 224)\n", "pipe": "stderr"}
+{"event": "line", "data": "\tinterpolation: bicubic\n", "pipe": "stderr"}
+{"event": "line", "data": "\tmean: (0.485, 0.456, 0.406)\n", "pipe": "stderr"}
+{"event": "line", "data": "\tstd: (0.229, 0.224, 0.225)\n", "pipe": "stderr"}
+{"event": "line", "data": "\tcrop_pct: 0.95\n", "pipe": "stderr"}
+{"event": "line", "data": "\tcrop_mode: center\n", "pipe": "stderr"}
+{"event": "line", "data": "Learning rate (0.1) calculated from base learning rate (0.1) and global batch size (256) with linear scaling.\n", "pipe": "stderr"}
+{"event": "line", "data": "Using native Torch AMP. Training in mixed precision.\n", "pipe": "stderr"}
+{"event": "line", "data": "Scheduled epochs: 300. LR stepped per epoch.\n", "pipe": "stderr"}
+{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py:456: UserWarning: Applied workaround for CuDNN issue, install nvrtc.so (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:80.)\n", "pipe": "stderr"}
+{"event": "line", "data": " return F.conv2d(input, weight, bias, self.stride,\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [14430.375, 24512.0], "load": 0.27, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24378.375, 24512.0], "load": 0.33, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 98.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 133.62 MiB is free. Including non-PyTorch memory, this process has 21.47 GiB memory in use. Of the allocated memory 20.92 GiB is allocated by PyTorch, and 256.13 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"}
+{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"}
+{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"}
+{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"}
+{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 1035, in \n", "pipe": "stderr"}
+{"event": "line", "data": " main()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 759, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " train_metrics = train_one_epoch(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 874, in train_one_epoch\n", "pipe": "stderr"}
+{"event": "line", "data": " output = model(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/resnet.py\", line 835, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " x = self.forward_features(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/resnet.py\", line 824, in forward_features\n", "pipe": "stderr"}
+{"event": "line", "data": " x = self.layer3(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/resnet.py\", line 485, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " x = self.conv3(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py\", line 460, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._conv_forward(input, self.weight, self.bias)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py\", line 456, in _conv_forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return F.conv2d(input, weight, bias, self.stride,\n", "pipe": "stderr"}
+{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 98.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 133.62 MiB is free. Including non-PyTorch memory, this process has 21.47 GiB memory in use. Of the allocated memory 20.92 GiB is allocated by PyTorch, and 256.13 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-resnet152.D1-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "resnet152", "--batch-size", "256", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/vodofeze.2024-04-09_00:35:28.669482/resnet152.D1", "--checkpoint-hist", "1"], "time": 1712623492.2821853, "return_code": 1}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/resnet50.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/resnet50.D0.data
new file mode 100644
index 000000000..77d75d3e9
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/resnet50.D0.data
@@ -0,0 +1,1313 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "torchvision", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision", "plan": {"method": "per_gpu"}, "argv": {"--precision": "tf32-fp16", "--lr": 0.01, "--no-stdout": true, "--epochs": 50, "--model": "resnet50", "--batch-size": 64}, "tags": ["classification", "convnet", "resnet", "vision"], "weight": 1.0, "name": "resnet50", "tag": ["resnet50", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712623279.001314, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-resnet50.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32-fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "resnet50", "--batch-size", "64"], "time": 1712623281.3676748}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "progress": [0, 50]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"}
+{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py:456: UserWarning: Applied workaround for CuDNN issue, install nvrtc.so (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:80.)\n", "pipe": "stderr"}
+{"event": "line", "data": " return F.conv2d(input, weight, bias, self.stride,\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "train", "loss": 7.01910400390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.14129638671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.14862060546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.10113525390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0660400390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.13128662109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.06024169921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.08062744140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.1383056640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.08367919921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.15252685546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.1346435546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.12567138671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.1083984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.07647705078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.16888427734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.1417236328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.10540771484375}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5818.375, 24512.0], "load": 0.54, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.03192138671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.125244140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0218505859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97784423828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0223388671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9576416015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97723388671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.026123046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.03466796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.117919921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.06207275390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.98907470703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.05682373046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.1102294921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.992431640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0548095703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.03485107421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9190673828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.1063232421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9847412109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.03143310546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.95245361328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.90667724609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.94012451171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.148681640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.05609130859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.05523681640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5818.375, 24512.0], "load": 0.89, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.06109619140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.06329345703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.07000732421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0341796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0882568359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.03497314453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.00689697265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.98052978515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.08697509765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.1002197265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 540.8629889848268, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.08203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.95965576171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.09124755859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.03094482421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93170166015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.95501708984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9737548828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0040283203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.10992431640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "progress": [1, 50]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 577.5857275953136, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.89483642578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.88623046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.84197998046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.776611328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5818.375, 24512.0], "load": 0.77, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.86297607421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97552490234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 365.62123573657004, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.88336181640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8970947265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9532470703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8927001953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.99560546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97161865234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8377685546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93768310546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8402099609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 566.3842861116495, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97271728515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97845458984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.7388916015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.984619140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.00238037109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96630859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.91485595703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.008056640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.915283203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 558.2142988182002, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96917724609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9864501953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.09857177734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97723388671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97802734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.979736328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.970703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5818.375, 24512.0], "load": 0.88, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.95098876953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92535400390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 561.6083376233403, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9305419921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.01165771484375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.94598388671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.99298095703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0369873046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.89678955078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92120361328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0009765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93212890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 557.8158022668257, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.99786376953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.897705078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92364501953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93316650390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.04547119140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.989013671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9619140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.01519775390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9361572265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 558.1368020685458, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.99517822265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.01025390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93121337890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97625732421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.028564453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9761962890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.95465087890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5818.375, 24512.0], "load": 0.88, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.15826416015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9326171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 563.7884862807988, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96490478515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.00274658203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0006103515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9759521484375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "progress": [2, 50]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.729736328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.7237548828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 394.57818703491273, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8052978515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8165283203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.88201904296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.858642578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.78741455078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9552001953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.99932861328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8310546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.91802978515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 563.1216979028789, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.89263916015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.99627685546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8643798828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.817138671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.00042724609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.88897705078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5818.375, 24512.0], "load": 0.78, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.91864013671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92852783203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.87933349609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 567.1127784839421, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.87811279296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.84014892578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9122314453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96112060546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.945068359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96295166015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.03204345703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9461669921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.02197265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 570.0805193305263, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9356689453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.91021728515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.89697265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9205322265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.82989501953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.86083984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96795654296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9688720703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9881591796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 569.3868496026478, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.966796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93280029296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.07684326171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96673583984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9493408203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.936767578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5818.375, 24512.0], "load": 0.89, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9932861328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97216796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.89306640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 567.7633167584821, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0050048828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96368408203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.931640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0535888671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97381591796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9410400390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0421142578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.98626708984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.98382568359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 574.1381331892618, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.83575439453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.04046630859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.95758056640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0806884765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.95721435546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.89801025390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.00994873046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96990966796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "progress": [3, 50]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 574.4590234354378, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.81915283203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8685302734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.82470703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.86651611328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5818.375, 24512.0], "load": 0.79, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.86968994140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.77001953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.7615966796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 427.1282456528531, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.83642578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.7822265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.85400390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.89483642578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96588134765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9073486328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9671630859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8880615234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.7904052734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 557.1360076862483, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8543701171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.86492919921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.95697021484375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.83990478515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9840087890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.94183349609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8853759765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.95355224609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93658447265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 557.2594035935296, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.01934814453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.89849853515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.90081787109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.00103759765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.87249755859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.893310546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5818.375, 24512.0], "load": 0.88, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.94775390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92236328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92474365234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 563.2345194170954, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9840087890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8936767578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0164794921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9599609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97174072265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.02459716796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.89459228515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.00933837890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.95965576171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 558.4610341909139, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96136474609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.951904296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0091552734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97320556640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.938720703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.035888671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.7872314453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9674072265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9561767578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 563.266503949861, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.98101806640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9078369140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.95440673828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.86907958984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8763427734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5818.375, 24512.0], "load": 0.89, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.85687255859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.98529052734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.01007080078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0015869140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 568.2406932224715, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.98931884765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.07196044921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.90740966796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "progress": [4, 50]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.76171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.79718017578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8758544921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 422.64438447903314, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.757080078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.74407958984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8837890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8330078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8489990234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.886962890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.90087890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.85711669921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.87554931640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 558.3128688484638, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.87847900390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.990478515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.844482421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93585205078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5818.375, 24512.0], "load": 0.77, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.84326171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.79632568359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.83502197265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.90557861328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.79718017578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 565.9064501128042, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9254150390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.89666748046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.03643798828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.79534912109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.99017333984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97515869140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9012451171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.984130859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.95159912109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 562.4214510337908, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.94427490234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93963623046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.98297119140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9666748046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.85235595703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92718505859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.04656982421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0394287109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.895751953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 556.7893589944041, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.81536865234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92144775390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.017822265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.886474609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5818.375, 24512.0], "load": 0.88, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.11016845703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.90948486328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.01763916015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9462890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96368408203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 549.6305394010564, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9893798828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.02923583984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93634033203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.00244140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.02734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.954833984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.00177001953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.86456298828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.01214599609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 560.072847741851, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9852294921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.99310302734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93255615234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0482177734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9892578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.00762939453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.12774658203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "progress": [5, 50]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.7882080078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 570.6962286778095, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93310546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5818.375, 24512.0], "load": 0.78, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.812255859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.872802734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.91485595703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.78887939453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97998046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.85943603515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 439.1577346754046, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.79376220703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8995361328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96575927734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.91510009765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.91705322265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92816162109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.7269287109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9024658203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 561.9890650209329, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8709716796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.94378662109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8624267578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.988037109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.87945556640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.883056640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.06353759765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.88275146484375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8873291015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 549.4166008377824, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.94964599609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.02496337890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9970703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9150390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5818.375, 24512.0], "load": 0.88, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96051025390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.89263916015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96856689453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9273681640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.83465576171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 555.4719942019061, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.80810546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0576171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.899169921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8436279296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.94610595703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.99530029296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96063232421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.83734130859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.919189453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 558.5194406731885, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.947998046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.04193115234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92437744140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.958740234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92291259765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.02398681640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.950439453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92779541015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9212646484375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 552.8064705183521, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9705810546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97064208984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.884033203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5818.375, 24512.0], "load": 0.88, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0501708984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.01141357421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9388427734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9471435546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.86895751953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.13372802734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 559.3161012865334, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9693603515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.01904296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.94915771484375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "progress": [6, 50]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8272705078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.77197265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.74981689453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 402.79519938401705, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8818359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.85467529296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8621826171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.88482666015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.938232421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9154052734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8677978515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.85736083984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9222412109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 552.3973828236761, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8905029296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8768310546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.87493896484375}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5818.375, 24512.0], "load": 0.78, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.94610595703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93951416015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.86285400390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9405517578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.89117431640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9681396484375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 550.838790677005, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.916259765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93115234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8900146484375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9620361328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8468017578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.08172607421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8963623046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97369384765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96624755859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 561.1008878128873, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.01641845703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.91741943359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97906494140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93377685546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8817138671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.91033935546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.803466796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9432373046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9241943359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 559.9272747013699, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.02960205078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.06591796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5818.375, 24512.0], "load": 0.89, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.86810302734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.98382568359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.04705810546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9481201171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.89422607421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.932861328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92510986328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 547.7687287979326, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.86151123046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.88433837890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96588134765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.05517578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.895263671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9244384765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.86724853515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.18231201171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.015869140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 558.414965490052, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.01800537109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9686279296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9774169921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8953857421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0140380859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.99749755859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.94049072265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "progress": [7, 50]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.904052734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 569.2849220655446, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5818.375, 24512.0], "load": 0.79, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.78289794921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8101806640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.73468017578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92413330078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.88800048828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92901611328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.814208984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 426.86287235653316, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.79412841796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9886474609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.76678466796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8865966796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.877197265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.80401611328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.832763671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.725341796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.90673828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 549.1694505672627, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.84552001953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8594970703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97454833984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.05029296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.89898681640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.863525390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92596435546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96124267578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.90216064453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 558.6267759781723, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96533203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5818.375, 24512.0], "load": 0.87, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8924560546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.928955078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.99713134765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.08917236328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.83929443359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8880615234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.88873291015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.00555419921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 549.1266568098622, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.73919677734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0775146484375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.86798095703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9541015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96783447265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.95538330078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.99176025390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.99755859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9661865234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 555.8718604766264, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0029296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.020263671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.972412109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9315185546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.01971435546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96453857421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96722412109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9183349609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.95294189453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 550.2756279987647, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8988037109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5818.375, 24512.0], "load": 0.88, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.09783935546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.03515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9989013671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.01422119140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0205078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9864501953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.86346435546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93499755859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 558.5491733319992, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.81903076171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0323486328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "progress": [8, 50]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.7816162109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.88311767578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9239501953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.81005859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 412.19744715429607, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.7730712890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.837890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9024658203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.80145263671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.80615234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9471435546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.82958984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0303955078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.87921142578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5818.375, 24512.0], "load": 0.78, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 552.2351977530562, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9085693359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.90576171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.73345947265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.80206298828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.916259765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8074951171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9190673828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9952392578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.99249267578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 553.3490277399642, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.94091796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.88726806640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.86572265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96881103515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.88702392578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9696044921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92523193359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9544677734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 555.3019226877891, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.94122314453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9337158203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9190673828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0208740234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.03631591796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97393798828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96246337890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0494384765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.81549072265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5818.375, 24512.0], "load": 0.88, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 554.8291932754344, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8983154296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9752197265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8819580078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.99969482421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.90478515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.91192626953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.965576171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.02691650390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.95599365234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 559.1426145201709, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8226318359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9794921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9971923828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.90887451171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9691162109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9595947265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9736328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.911865234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.85284423828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 567.0612033055274, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5818.375, 24512.0], "load": 0.89, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-resnet50.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32-fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "resnet50", "--batch-size", "64"], "time": 1712623351.9785657, "return_code": 0}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/resnet50.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/resnet50.D1.data
new file mode 100644
index 000000000..01d96ae53
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/resnet50.D1.data
@@ -0,0 +1,1305 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "torchvision", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision", "plan": {"method": "per_gpu"}, "argv": {"--precision": "tf32-fp16", "--lr": 0.01, "--no-stdout": true, "--epochs": 50, "--model": "resnet50", "--batch-size": 64}, "tags": ["classification", "convnet", "resnet", "vision"], "weight": 1.0, "name": "resnet50", "tag": ["resnet50", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712623281.350314, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-resnet50.D1-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32-fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "resnet50", "--batch-size", "64"], "time": 1712623281.3746731}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "progress": [0, 50]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"}
+{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py:456: UserWarning: Applied workaround for CuDNN issue, install nvrtc.so (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:80.)\n", "pipe": "stderr"}
+{"event": "line", "data": " return F.conv2d(input, weight, bias, self.stride,\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "train", "loss": 7.01910400390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.14129638671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.14862060546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.10113525390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0660400390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.13128662109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.06024169921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.08062744140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.1383056640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.08367919921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.15252685546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.1346435546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.12567138671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.1083984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.07647705078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.16888427734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.1417236328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.10540771484375}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5818.375, 24512.0], "load": 0.53, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.03192138671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.125244140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0218505859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97784423828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0223388671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9576416015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97723388671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.026123046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.03466796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.117919921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.06207275390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.98907470703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.05682373046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.1102294921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.992431640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0548095703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.03485107421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9190673828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.1063232421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9847412109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.03143310546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.95245361328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.90667724609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.94012451171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.148681640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.05609130859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.05523681640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5818.375, 24512.0], "load": 0.88, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.06109619140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.06329345703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.07000732421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0341796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0882568359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.03497314453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.00689697265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.98052978515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.08697509765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.1002197265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 571.562147430314, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.08203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.95965576171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.09124755859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.03094482421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93170166015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.95501708984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9737548828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0040283203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.10992431640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 566.9717672333396, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "progress": [1, 50]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.89483642578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.88623046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.84197998046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.776611328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.86297607421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5818.375, 24512.0], "load": 0.77, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 420.49141441352765, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97552490234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.88336181640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8970947265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9532470703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8927001953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.99560546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97161865234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8377685546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93768310546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 562.7444030816779, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8402099609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97271728515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97845458984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.7388916015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.984619140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.00238037109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96630859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.91485595703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.008056640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 563.2707177123414, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.915283203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96917724609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9864501953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.09857177734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97723388671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97802734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.979736328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.970703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5818.375, 24512.0], "load": 0.88, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.95098876953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 556.4874482125751, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92535400390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9305419921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.01165771484375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.94598388671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.99298095703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0369873046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.89678955078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92120361328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0009765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 542.3923850097916, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93212890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.99786376953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.897705078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92364501953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93316650390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.04547119140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.989013671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9619140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.01519775390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 549.8178578005214, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9361572265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.99517822265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.01025390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93121337890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97625732421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.028564453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9761962890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.95465087890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5818.375, 24512.0], "load": 0.87, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.15826416015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 560.8759999739893, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9326171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96490478515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.00274658203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0006103515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9759521484375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "progress": [2, 50]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.729736328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 551.837568301773, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.7237548828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8052978515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8165283203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.88201904296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.858642578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.78741455078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9552001953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.99932861328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8310546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 469.3895168593994, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.91802978515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.89263916015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.99627685546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8643798828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.817138671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.00042724609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.88897705078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5818.375, 24512.0], "load": 0.76, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.91864013671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92852783203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 564.7536671573436, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.87933349609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.87811279296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.84014892578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9122314453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96112060546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.945068359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96295166015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.03204345703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9461669921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 559.0853398782426, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.02197265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9356689453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.91021728515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.89697265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9205322265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.82989501953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.86083984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96795654296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9688720703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 556.4904079139615, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9881591796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.966796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93280029296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.07684326171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96673583984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9493408203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5818.375, 24512.0], "load": 0.87, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.936767578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9932861328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97216796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 549.2815062226576, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.89306640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0050048828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96368408203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.931640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0535888671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97381591796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9410400390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0421142578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.98626708984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 560.7178202843138, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.98382568359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.83575439453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.04046630859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.95758056640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0806884765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.95721435546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.89801025390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.00994873046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96990966796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 568.0018181501531, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "progress": [3, 50]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.81915283203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8685302734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5818.375, 24512.0], "load": 0.76, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.82470703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.86651611328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.86968994140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 403.9121216525652, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.77001953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.7615966796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.83642578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.7822265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.85400390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.89483642578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96588134765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9073486328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9671630859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 566.8610809580437, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8880615234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.7904052734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8543701171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.86492919921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.95697021484375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.83990478515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9840087890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.94183349609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8853759765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 556.3672931825062, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.95355224609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93658447265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.01934814453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.89849853515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.90081787109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.00103759765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5818.375, 24512.0], "load": 0.88, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.87249755859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.893310546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.94775390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 561.8522814365483, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92236328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92474365234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9840087890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8936767578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0164794921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9599609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97174072265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.02459716796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.89459228515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 553.9684525026213, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.00933837890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.95965576171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96136474609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.951904296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0091552734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97320556640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.938720703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.035888671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.7872314453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 564.0927522435003, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9674072265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9561767578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.98101806640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9078369140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.95440673828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.86907958984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5818.375, 24512.0], "load": 0.88, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8763427734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.85687255859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.98529052734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.01007080078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 576.4473832928974, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0015869140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.98931884765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.07196044921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.90740966796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "progress": [4, 50]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.76171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 564.4311620857126, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.79718017578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8758544921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.757080078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.74407958984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8837890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8330078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8489990234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.886962890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 468.69604645968604, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.90087890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.85711669921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.87554931640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.87847900390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.990478515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.844482421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5818.375, 24512.0], "load": 0.76, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93585205078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.84326171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.79632568359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 565.6751973056183, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.83502197265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.90557861328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.79718017578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9254150390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.89666748046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.03643798828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.79534912109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.99017333984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97515869140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 557.517644480054, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9012451171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.984130859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.95159912109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.94427490234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93963623046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.98297119140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9666748046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.85235595703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92718505859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 555.5728743396013, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.04656982421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0394287109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.895751953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.81536865234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92144775390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5818.375, 24512.0], "load": 0.88, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.017822265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.886474609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.11016845703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.90948486328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 552.4945852796945, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.01763916015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9462890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96368408203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9893798828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.02923583984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93634033203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.00244140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.02734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.954833984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 557.3192349550757, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.00177001953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.86456298828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.01214599609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9852294921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.99310302734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93255615234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0482177734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9892578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.00762939453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 572.4967515431069, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.12774658203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "progress": [5, 50]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.7882080078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5818.375, 24512.0], "load": 0.8, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93310546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.812255859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.872802734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.91485595703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 423.66935800897835, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.78887939453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97998046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.85943603515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.79376220703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8995361328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96575927734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.91510009765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.91705322265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92816162109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 553.4692168291241, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.7269287109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9024658203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8709716796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.94378662109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8624267578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.988037109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.87945556640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.883056640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.06353759765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 560.1308303804195, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.88275146484375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8873291015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.94964599609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.02496337890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5818.375, 24512.0], "load": 0.85, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9970703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9150390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96051025390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.89263916015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96856689453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 556.4007297666966, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9273681640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.83465576171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.80810546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0576171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.899169921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8436279296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.94610595703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.99530029296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96063232421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 556.1966644816506, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.83734130859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.919189453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.947998046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.04193115234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92437744140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.958740234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92291259765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.02398681640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.950439453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 551.1792281026767, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92779541015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9212646484375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9705810546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5818.375, 24512.0], "load": 0.88, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97064208984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.884033203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0501708984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.01141357421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9388427734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9471435546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 560.0599026848275, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.86895751953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.13372802734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9693603515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.01904296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.94915771484375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "progress": [6, 50]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8272705078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 568.7964354522785, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.77197265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.74981689453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8818359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.85467529296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8621826171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.88482666015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.938232421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9154052734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 442.1576664045869, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8677978515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.85736083984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9222412109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5818.375, 24512.0], "load": 0.76, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8905029296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8768310546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.87493896484375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.94610595703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93951416015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.86285400390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 569.1787838703175, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9405517578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.89117431640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9681396484375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.916259765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93115234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8900146484375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9620361328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8468017578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.08172607421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 542.1509950854435, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8963623046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97369384765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96624755859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.01641845703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.91741943359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97906494140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93377685546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8817138671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.91033935546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 542.4429091010177, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.803466796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9432373046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9241943359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5818.375, 24512.0], "load": 0.87, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.02960205078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.06591796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.86810302734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.98382568359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.04705810546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9481201171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 551.5429587105873, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.89422607421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.932861328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92510986328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.86151123046875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.88433837890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96588134765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.05517578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.895263671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9244384765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 545.3638265576355, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.86724853515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.18231201171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.015869140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.01800537109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9686279296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9774169921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8953857421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0140380859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.99749755859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 557.8913597234284, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.94049072265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "progress": [7, 50]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5818.375, 24512.0], "load": 0.88, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.904052734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.78289794921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8101806640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.73468017578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92413330078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 433.9838793500946, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.88800048828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92901611328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.814208984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.79412841796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9886474609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.76678466796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8865966796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.877197265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 567.142052926248, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.80401611328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.832763671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.725341796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.90673828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.84552001953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8594970703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97454833984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.05029296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.89898681640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 565.7526995648817, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.863525390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92596435546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5818.375, 24512.0], "load": 0.76, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96124267578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.90216064453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96533203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8924560546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.928955078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.99713134765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.08917236328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 559.6501151427645, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.83929443359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8880615234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.88873291015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.00555419921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.73919677734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0775146484375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.86798095703125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9541015625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96783447265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 560.648659831527, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.95538330078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.99176025390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.99755859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9661865234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0029296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.020263671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.972412109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9315185546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.01971435546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 556.6488314705962, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96453857421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96722412109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5818.375, 24512.0], "load": 0.88, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9183349609375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.95294189453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8988037109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.09783935546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.03515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9989013671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.01422119140625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 562.3699744601726, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0205078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9864501953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.86346435546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.93499755859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.81903076171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0323486328125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "progress": [8, 50]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.7816162109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 566.4862111774687, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.88311767578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9239501953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.81005859375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.7730712890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.837890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9024658203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.80145263671875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.80615234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 457.02134414465405, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9471435546875}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5818.375, 24512.0], "load": 0.77, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.82958984375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0303955078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.87921142578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9085693359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.90576171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.73345947265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.80206298828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.916259765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 562.1546465512536, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8074951171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9190673828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9952392578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.99249267578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.94091796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.88726806640625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.86572265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96881103515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 561.3746981032132, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.88702392578125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9696044921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.92523193359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9544677734375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.94122314453125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9337158203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9190673828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0208740234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.03631591796875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 561.9854419583572, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.97393798828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5818.375, 24512.0], "load": 0.89, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.96246337890625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.0494384765625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.81549072265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8983154296875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9752197265625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8819580078125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.99969482421875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.90478515625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 561.4594785112506, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.91192626953125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.965576171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 7.02691650390625}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.95599365234375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.8226318359375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9794921875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9971923828125}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.90887451171875}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "loss": 6.9691162109375}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 567.6137184620268, "units": "items/s"}, "pipe": "data"}
+{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5818.375, 24512.0], "load": 0.89, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-resnet50.D1-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32-fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "resnet50", "--batch-size", "64"], "time": 1712623351.85862, "return_code": 0}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/rwkv.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/rwkv.D0.data
new file mode 100644
index 000000000..317f8c4cb
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/rwkv.D0.data
@@ -0,0 +1,25 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/rwkv", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "rwkv", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/rwkv", "tags": ["llm", "rnn", "unsupported-rocm"], "plan": {"method": "per_gpu"}, "argv": {"--data_type": "dummy", "--ctx_len": 128, "--epoch_steps": 1000, "--epoch_count": 20, "--epoch_begin": 0, "--epoch_save": 0, "--micro_bsz": 16, "--n_layer": 12, "--n_embd": 768, "--pre_ffn": 0, "--head_qk": 0, "--lr_init": "6e-4", "--lr_final": "1e-5", "--warmup_steps": 0, "--beta1": 0.9, "--beta2": 0.99, "--adam_eps": "1e-8", "--accelerator": "gpu", "--devices": 1, "--precision": "tf32", "--strategy": "ddp_find_unused_parameters_false", "--grad_cp": 0, "--random_seed": 1234, "--enable_progress_bar": "False"}, "weight": 1.0, "name": "rwkv", "tag": ["rwkv", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712623902.898479, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/rwkv/voirconf-rwkv.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/rwkv/rwkv-v4neo/train.py", "--data_type", "dummy", "--ctx_len", "128", "--epoch_steps", "1000", "--epoch_count", "20", "--epoch_begin", "0", "--epoch_save", "0", "--micro_bsz", "16", "--n_layer", "12", "--n_embd", "768", "--pre_ffn", "0", "--head_qk", "0", "--lr_init", "6e-4", "--lr_final", "1e-5", "--warmup_steps", "0", "--beta1", "0.9", "--beta2", "0.99", "--adam_eps", "1e-8", "--accelerator", "gpu", "--devices", "1", "--precision", "tf32", "--strategy", "ddp_find_unused_parameters_false", "--grad_cp", "0", "--random_seed", "1234", "--enable_progress_bar", "False"], "time": 1712623905.3506181}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2393.125, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "error", "data": {"type": "ModuleNotFoundError", "message": "No module named 'deepspeed'"}, "pipe": "data"}
+{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"}
+{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"}
+{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"}
+{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"}
+{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/rwkv/rwkv-v4neo/train.py\", line 6, in \n", "pipe": "stderr"}
+{"event": "line", "data": " import deepspeed\n", "pipe": "stderr"}
+{"event": "line", "data": "ModuleNotFoundError: No module named 'deepspeed'\n", "pipe": "stderr"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/rwkv/voirconf-rwkv.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/rwkv/rwkv-v4neo/train.py", "--data_type", "dummy", "--ctx_len", "128", "--epoch_steps", "1000", "--epoch_count", "20", "--epoch_begin", "0", "--epoch_save", "0", "--micro_bsz", "16", "--n_layer", "12", "--n_embd", "768", "--pre_ffn", "0", "--head_qk", "0", "--lr_init", "6e-4", "--lr_final", "1e-5", "--warmup_steps", "0", "--beta1", "0.9", "--beta2", "0.99", "--adam_eps", "1e-8", "--accelerator", "gpu", "--devices", "1", "--precision", "tf32", "--strategy", "ddp_find_unused_parameters_false", "--grad_cp", "0", "--random_seed", "1234", "--enable_progress_bar", "False"], "time": 1712623906.9577646, "return_code": 1}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/rwkv.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/rwkv.D1.data
new file mode 100644
index 000000000..774db4898
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/rwkv.D1.data
@@ -0,0 +1,25 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/rwkv", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "rwkv", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/rwkv", "tags": ["llm", "rnn", "unsupported-rocm"], "plan": {"method": "per_gpu"}, "argv": {"--data_type": "dummy", "--ctx_len": 128, "--epoch_steps": 1000, "--epoch_count": 20, "--epoch_begin": 0, "--epoch_save": 0, "--micro_bsz": 16, "--n_layer": 12, "--n_embd": 768, "--pre_ffn": 0, "--head_qk": 0, "--lr_init": "6e-4", "--lr_final": "1e-5", "--warmup_steps": 0, "--beta1": 0.9, "--beta2": 0.99, "--adam_eps": "1e-8", "--accelerator": "gpu", "--devices": 1, "--precision": "tf32", "--strategy": "ddp_find_unused_parameters_false", "--grad_cp": 0, "--random_seed": 1234, "--enable_progress_bar": "False"}, "weight": 1.0, "name": "rwkv", "tag": ["rwkv", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712623905.332813, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/rwkv/voirconf-rwkv.D1-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/rwkv/rwkv-v4neo/train.py", "--data_type", "dummy", "--ctx_len", "128", "--epoch_steps", "1000", "--epoch_count", "20", "--epoch_begin", "0", "--epoch_save", "0", "--micro_bsz", "16", "--n_layer", "12", "--n_embd", "768", "--pre_ffn", "0", "--head_qk", "0", "--lr_init", "6e-4", "--lr_final", "1e-5", "--warmup_steps", "0", "--beta1", "0.9", "--beta2", "0.99", "--adam_eps", "1e-8", "--accelerator", "gpu", "--devices", "1", "--precision", "tf32", "--strategy", "ddp_find_unused_parameters_false", "--grad_cp", "0", "--random_seed", "1234", "--enable_progress_bar", "False"], "time": 1712623905.3838406}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2393.125, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "error", "data": {"type": "ModuleNotFoundError", "message": "No module named 'deepspeed'"}, "pipe": "data"}
+{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"}
+{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"}
+{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"}
+{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"}
+{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/rwkv/rwkv-v4neo/train.py\", line 6, in \n", "pipe": "stderr"}
+{"event": "line", "data": " import deepspeed\n", "pipe": "stderr"}
+{"event": "line", "data": "ModuleNotFoundError: No module named 'deepspeed'\n", "pipe": "stderr"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/rwkv/voirconf-rwkv.D1-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/rwkv/rwkv-v4neo/train.py", "--data_type", "dummy", "--ctx_len", "128", "--epoch_steps", "1000", "--epoch_count", "20", "--epoch_begin", "0", "--epoch_save", "0", "--micro_bsz", "16", "--n_layer", "12", "--n_embd", "768", "--pre_ffn", "0", "--head_qk", "0", "--lr_init", "6e-4", "--lr_final", "1e-5", "--warmup_steps", "0", "--beta1", "0.9", "--beta2", "0.99", "--adam_eps", "1e-8", "--accelerator", "gpu", "--devices", "1", "--precision", "tf32", "--strategy", "ddp_find_unused_parameters_false", "--grad_cp", "0", "--random_seed", "1234", "--enable_progress_bar", "False"], "time": 1712623906.8942654, "return_code": 1}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/stargan.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/stargan.D0.data
new file mode 100644
index 000000000..ec635c96f
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/stargan.D0.data
@@ -0,0 +1,171 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "stargan", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "tags": ["gan", "resnet", "vision"], "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/stargan", "plan": {"method": "per_gpu"}, "argv": {"--image_size": 512, "--c_dim": 5, "--batch_size": 16}, "weight": 1.0, "name": "stargan", "tag": ["stargan", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 24460.375, "total": 24512.0}, "utilization": {"compute": 0.97, "memory": 0.9978938887075718}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712623657.926485, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan/voirconf-stargan.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/stargan/stargan/main.py", "--image_size", "512", "--c_dim", "5", "--batch_size", "16"], "time": 1712623660.3444593}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "line", "data": "Namespace(c_dim=5, c2_dim=8, celeba_crop_size=178, rafd_crop_size=256, image_size=512, g_conv_dim=64, d_conv_dim=64, g_repeat_num=6, d_repeat_num=6, lambda_cls=1, lambda_rec=10, lambda_gp=10, dataset='synth', batch_size=16, num_iters=200000, num_iters_decay=100000, g_lr=0.0001, d_lr=0.0001, n_critic=5, beta1=0.5, beta2=0.999, resume_iters=None, selected_attrs=['Black_Hair', 'Blond_Hair', 'Brown_Hair', 'Male', 'Young'], test_iters=200000, num_workers=1, mode='train', use_tensorboard=False, celeba_image_dir='data/celeba/images', attr_path='data/celeba/list_attr_celeba.txt', rafd_image_dir='data/RaFD/train', log_dir='/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan/logs', model_save_dir='/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan/models', sample_dir='/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan/samples', result_dir='/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan/results', log_step=10, sample_step=1000, model_save_step=10000, lr_update_step=1000)\n", "pipe": "stdout"}
+{"event": "line", "data": "Generator(\n", "pipe": "stdout"}
+{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"}
+{"event": "line", "data": " (0): Conv2d(8, 64, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (1): InstanceNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (3): Conv2d(64, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (4): InstanceNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (5): ReLU(inplace=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (6): Conv2d(128, 256, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (7): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (8): ReLU(inplace=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (9): ResidualBlock(\n", "pipe": "stdout"}
+{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"}
+{"event": "line", "data": " (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (1): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (4): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " )\n", "pipe": "stdout"}
+{"event": "line", "data": " )\n", "pipe": "stdout"}
+{"event": "line", "data": " (10): ResidualBlock(\n", "pipe": "stdout"}
+{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"}
+{"event": "line", "data": " (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (1): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (4): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " )\n", "pipe": "stdout"}
+{"event": "line", "data": " )\n", "pipe": "stdout"}
+{"event": "line", "data": " (11): ResidualBlock(\n", "pipe": "stdout"}
+{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"}
+{"event": "line", "data": " (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (1): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (4): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " )\n", "pipe": "stdout"}
+{"event": "line", "data": " )\n", "pipe": "stdout"}
+{"event": "line", "data": " (12): ResidualBlock(\n", "pipe": "stdout"}
+{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"}
+{"event": "line", "data": " (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (1): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (4): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " )\n", "pipe": "stdout"}
+{"event": "line", "data": " )\n", "pipe": "stdout"}
+{"event": "line", "data": " (13): ResidualBlock(\n", "pipe": "stdout"}
+{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"}
+{"event": "line", "data": " (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (1): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (4): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " )\n", "pipe": "stdout"}
+{"event": "line", "data": " )\n", "pipe": "stdout"}
+{"event": "line", "data": " (14): ResidualBlock(\n", "pipe": "stdout"}
+{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"}
+{"event": "line", "data": " (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (1): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (4): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " )\n", "pipe": "stdout"}
+{"event": "line", "data": " )\n", "pipe": "stdout"}
+{"event": "line", "data": " (15): ConvTranspose2d(256, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (16): InstanceNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (17): ReLU(inplace=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (18): ConvTranspose2d(128, 64, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (19): InstanceNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (20): ReLU(inplace=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (21): Conv2d(64, 3, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (22): Tanh()\n", "pipe": "stdout"}
+{"event": "line", "data": " )\n", "pipe": "stdout"}
+{"event": "line", "data": ")\n", "pipe": "stdout"}
+{"event": "line", "data": "G\n", "pipe": "stdout"}
+{"event": "line", "data": "The number of parameters: 8430528\n", "pipe": "stdout"}
+{"event": "line", "data": "Discriminator(\n", "pipe": "stdout"}
+{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"}
+{"event": "line", "data": " (0): Conv2d(3, 64, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", "pipe": "stdout"}
+{"event": "line", "data": " (1): LeakyReLU(negative_slope=0.01)\n", "pipe": "stdout"}
+{"event": "line", "data": " (2): Conv2d(64, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", "pipe": "stdout"}
+{"event": "line", "data": " (3): LeakyReLU(negative_slope=0.01)\n", "pipe": "stdout"}
+{"event": "line", "data": " (4): Conv2d(128, 256, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", "pipe": "stdout"}
+{"event": "line", "data": " (5): LeakyReLU(negative_slope=0.01)\n", "pipe": "stdout"}
+{"event": "line", "data": " (6): Conv2d(256, 512, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", "pipe": "stdout"}
+{"event": "line", "data": " (7): LeakyReLU(negative_slope=0.01)\n", "pipe": "stdout"}
+{"event": "line", "data": " (8): Conv2d(512, 1024, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", "pipe": "stdout"}
+{"event": "line", "data": " (9): LeakyReLU(negative_slope=0.01)\n", "pipe": "stdout"}
+{"event": "line", "data": " (10): Conv2d(1024, 2048, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", "pipe": "stdout"}
+{"event": "line", "data": " (11): LeakyReLU(negative_slope=0.01)\n", "pipe": "stdout"}
+{"event": "line", "data": " )\n", "pipe": "stdout"}
+{"event": "line", "data": " (conv1): Conv2d(2048, 1, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (conv2): Conv2d(2048, 5, kernel_size=(8, 8), stride=(1, 1), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": ")\n", "pipe": "stdout"}
+{"event": "line", "data": "D\n", "pipe": "stdout"}
+{"event": "line", "data": "The number of parameters: 45376448\n", "pipe": "stdout"}
+{"event": "data", "data": {"task": "train", "progress": [0, 10000]}, "pipe": "data"}
+{"event": "line", "data": "Start training...\n", "pipe": "stdout"}
+{"event": "data", "data": {"task": "train", "progress": [1, 10000]}, "pipe": "data"}
+{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py:456: UserWarning: Applied workaround for CuDNN issue, install nvrtc.so (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:80.)\n", "pipe": "stderr"}
+{"event": "line", "data": " return F.conv2d(input, weight, bias, self.stride,\n", "pipe": "stderr"}
+{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='sum' instead.\n", "pipe": "stderr"}
+{"event": "line", "data": " warnings.warn(warning.format(ret))\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "train", "loss": 13.107915878295898}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [16198.375, 24512.0], "load": 0.67, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [20402.375, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [20142.375, 24512.0], "load": 0.98, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [2, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24384.375, 24512.0], "load": 0.98, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 256.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 127.62 MiB is free. Including non-PyTorch memory, this process has 21.48 GiB memory in use. Of the allocated memory 20.76 GiB is allocated by PyTorch, and 433.74 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"}
+{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"}
+{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"}
+{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"}
+{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/stargan/stargan/main.py\", line 222, in \n", "pipe": "stderr"}
+{"event": "line", "data": " main(config)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/stargan/stargan/main.py\", line 77, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " solver.train()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/stargan/stargan/solver.py\", line 282, in train\n", "pipe": "stderr"}
+{"event": "line", "data": " x_fake = self.G(x_real, c_trg)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/stargan/stargan/model.py\", line 94, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return self.main(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/stargan/stargan/model.py\", line 20, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return x + self.main(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py\", line 460, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._conv_forward(input, self.weight, self.bias)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py\", line 456, in _conv_forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return F.conv2d(input, weight, bias, self.stride,\n", "pipe": "stderr"}
+{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 256.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 127.62 MiB is free. Including non-PyTorch memory, this process has 21.48 GiB memory in use. Of the allocated memory 20.76 GiB is allocated by PyTorch, and 433.74 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan/voirconf-stargan.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/stargan/stargan/main.py", "--image_size", "512", "--c_dim", "5", "--batch_size", "16"], "time": 1712623672.0065103, "return_code": 1}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/stargan.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/stargan.D1.data
new file mode 100644
index 000000000..002c71be0
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/stargan.D1.data
@@ -0,0 +1,171 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "stargan", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "tags": ["gan", "resnet", "vision"], "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/stargan", "plan": {"method": "per_gpu"}, "argv": {"--image_size": 512, "--c_dim": 5, "--batch_size": 16}, "weight": 1.0, "name": "stargan", "tag": ["stargan", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712623660.326328, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan/voirconf-stargan.D1-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/stargan/stargan/main.py", "--image_size", "512", "--c_dim", "5", "--batch_size", "16"], "time": 1712623660.3523586}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "line", "data": "Namespace(c_dim=5, c2_dim=8, celeba_crop_size=178, rafd_crop_size=256, image_size=512, g_conv_dim=64, d_conv_dim=64, g_repeat_num=6, d_repeat_num=6, lambda_cls=1, lambda_rec=10, lambda_gp=10, dataset='synth', batch_size=16, num_iters=200000, num_iters_decay=100000, g_lr=0.0001, d_lr=0.0001, n_critic=5, beta1=0.5, beta2=0.999, resume_iters=None, selected_attrs=['Black_Hair', 'Blond_Hair', 'Brown_Hair', 'Male', 'Young'], test_iters=200000, num_workers=1, mode='train', use_tensorboard=False, celeba_image_dir='data/celeba/images', attr_path='data/celeba/list_attr_celeba.txt', rafd_image_dir='data/RaFD/train', log_dir='/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan/logs', model_save_dir='/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan/models', sample_dir='/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan/samples', result_dir='/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan/results', log_step=10, sample_step=1000, model_save_step=10000, lr_update_step=1000)\n", "pipe": "stdout"}
+{"event": "line", "data": "Generator(\n", "pipe": "stdout"}
+{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"}
+{"event": "line", "data": " (0): Conv2d(8, 64, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (1): InstanceNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (3): Conv2d(64, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (4): InstanceNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (5): ReLU(inplace=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (6): Conv2d(128, 256, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (7): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (8): ReLU(inplace=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (9): ResidualBlock(\n", "pipe": "stdout"}
+{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"}
+{"event": "line", "data": " (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (1): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (4): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " )\n", "pipe": "stdout"}
+{"event": "line", "data": " )\n", "pipe": "stdout"}
+{"event": "line", "data": " (10): ResidualBlock(\n", "pipe": "stdout"}
+{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"}
+{"event": "line", "data": " (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (1): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (4): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " )\n", "pipe": "stdout"}
+{"event": "line", "data": " )\n", "pipe": "stdout"}
+{"event": "line", "data": " (11): ResidualBlock(\n", "pipe": "stdout"}
+{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"}
+{"event": "line", "data": " (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (1): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (4): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " )\n", "pipe": "stdout"}
+{"event": "line", "data": " )\n", "pipe": "stdout"}
+{"event": "line", "data": " (12): ResidualBlock(\n", "pipe": "stdout"}
+{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"}
+{"event": "line", "data": " (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (1): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (4): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " )\n", "pipe": "stdout"}
+{"event": "line", "data": " )\n", "pipe": "stdout"}
+{"event": "line", "data": " (13): ResidualBlock(\n", "pipe": "stdout"}
+{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"}
+{"event": "line", "data": " (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (1): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (4): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " )\n", "pipe": "stdout"}
+{"event": "line", "data": " )\n", "pipe": "stdout"}
+{"event": "line", "data": " (14): ResidualBlock(\n", "pipe": "stdout"}
+{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"}
+{"event": "line", "data": " (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (1): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (4): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " )\n", "pipe": "stdout"}
+{"event": "line", "data": " )\n", "pipe": "stdout"}
+{"event": "line", "data": " (15): ConvTranspose2d(256, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (16): InstanceNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (17): ReLU(inplace=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (18): ConvTranspose2d(128, 64, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (19): InstanceNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (20): ReLU(inplace=True)\n", "pipe": "stdout"}
+{"event": "line", "data": " (21): Conv2d(64, 3, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (22): Tanh()\n", "pipe": "stdout"}
+{"event": "line", "data": " )\n", "pipe": "stdout"}
+{"event": "line", "data": ")\n", "pipe": "stdout"}
+{"event": "line", "data": "G\n", "pipe": "stdout"}
+{"event": "line", "data": "The number of parameters: 8430528\n", "pipe": "stdout"}
+{"event": "line", "data": "Discriminator(\n", "pipe": "stdout"}
+{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"}
+{"event": "line", "data": " (0): Conv2d(3, 64, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", "pipe": "stdout"}
+{"event": "line", "data": " (1): LeakyReLU(negative_slope=0.01)\n", "pipe": "stdout"}
+{"event": "line", "data": " (2): Conv2d(64, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", "pipe": "stdout"}
+{"event": "line", "data": " (3): LeakyReLU(negative_slope=0.01)\n", "pipe": "stdout"}
+{"event": "line", "data": " (4): Conv2d(128, 256, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", "pipe": "stdout"}
+{"event": "line", "data": " (5): LeakyReLU(negative_slope=0.01)\n", "pipe": "stdout"}
+{"event": "line", "data": " (6): Conv2d(256, 512, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", "pipe": "stdout"}
+{"event": "line", "data": " (7): LeakyReLU(negative_slope=0.01)\n", "pipe": "stdout"}
+{"event": "line", "data": " (8): Conv2d(512, 1024, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", "pipe": "stdout"}
+{"event": "line", "data": " (9): LeakyReLU(negative_slope=0.01)\n", "pipe": "stdout"}
+{"event": "line", "data": " (10): Conv2d(1024, 2048, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", "pipe": "stdout"}
+{"event": "line", "data": " (11): LeakyReLU(negative_slope=0.01)\n", "pipe": "stdout"}
+{"event": "line", "data": " )\n", "pipe": "stdout"}
+{"event": "line", "data": " (conv1): Conv2d(2048, 1, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": " (conv2): Conv2d(2048, 5, kernel_size=(8, 8), stride=(1, 1), bias=False)\n", "pipe": "stdout"}
+{"event": "line", "data": ")\n", "pipe": "stdout"}
+{"event": "line", "data": "D\n", "pipe": "stdout"}
+{"event": "line", "data": "The number of parameters: 45376448\n", "pipe": "stdout"}
+{"event": "data", "data": {"task": "train", "progress": [0, 10000]}, "pipe": "data"}
+{"event": "line", "data": "Start training...\n", "pipe": "stdout"}
+{"event": "data", "data": {"task": "train", "progress": [1, 10000]}, "pipe": "data"}
+{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py:456: UserWarning: Applied workaround for CuDNN issue, install nvrtc.so (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:80.)\n", "pipe": "stderr"}
+{"event": "line", "data": " return F.conv2d(input, weight, bias, self.stride,\n", "pipe": "stderr"}
+{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='sum' instead.\n", "pipe": "stderr"}
+{"event": "line", "data": " warnings.warn(warning.format(ret))\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "train", "loss": 13.121368408203125}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [18876.375, 24512.0], "load": 0.69, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [20402.375, 24512.0], "load": 0.93, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [19756.375, 24512.0], "load": 0.98, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [2, 10000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24384.375, 24512.0], "load": 0.98, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 256.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 127.62 MiB is free. Including non-PyTorch memory, this process has 21.48 GiB memory in use. Of the allocated memory 20.76 GiB is allocated by PyTorch, and 433.74 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"}
+{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"}
+{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"}
+{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"}
+{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/stargan/stargan/main.py\", line 222, in \n", "pipe": "stderr"}
+{"event": "line", "data": " main(config)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/stargan/stargan/main.py\", line 77, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " solver.train()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/stargan/stargan/solver.py\", line 282, in train\n", "pipe": "stderr"}
+{"event": "line", "data": " x_fake = self.G(x_real, c_trg)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/stargan/stargan/model.py\", line 94, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return self.main(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/stargan/stargan/model.py\", line 20, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return x + self.main(x)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py\", line 460, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._conv_forward(input, self.weight, self.bias)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py\", line 456, in _conv_forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return F.conv2d(input, weight, bias, self.stride,\n", "pipe": "stderr"}
+{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 256.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 127.62 MiB is free. Including non-PyTorch memory, this process has 21.48 GiB memory in use. Of the allocated memory 20.76 GiB is allocated by PyTorch, and 433.74 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan/voirconf-stargan.D1-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/stargan/stargan/main.py", "--image_size", "512", "--c_dim", "5", "--batch_size", "16"], "time": 1712623672.1867037, "return_code": 1}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/super-slomo.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/super-slomo.D0.data
new file mode 100644
index 000000000..7bfc19e25
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/super-slomo.D0.data
@@ -0,0 +1,59 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/super-slomo", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "super-slomo", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "tags": ["convnet", "unet", "video-interpolation", "vision"], "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/super-slomo", "plan": {"method": "per_gpu"}, "argv": {"--train_batch_size": 32}, "weight": 1.0, "name": "super-slomo", "tag": ["super-slomo", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712623674.601034, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/super-slomo/voirconf-super-slomo.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/super-slomo/slomo/train.py", "--train_batch_size", "32"], "time": 1712623677.0263307}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead.\n", "pipe": "stderr"}
+{"event": "line", "data": " warnings.warn(\n", "pipe": "stderr"}
+{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=VGG16_Weights.IMAGENET1K_V1`. You can also use `weights=VGG16_Weights.DEFAULT` to get the most up-to-date weights.\n", "pipe": "stderr"}
+{"event": "line", "data": " warnings.warn(msg)\n", "pipe": "stderr"}
+{"event": "line", "data": "Epoch: 0\n", "pipe": "stdout"}
+{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/optim/lr_scheduler.py:136: UserWarning: Detected call of `lr_scheduler.step()` before `optimizer.step()`. In PyTorch 1.1.0 and later, you should call them in the opposite order: `optimizer.step()` before `lr_scheduler.step()`. Failure to do this will result in PyTorch skipping the first value of the learning rate schedule. See more details at https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate\n", "pipe": "stderr"}
+{"event": "line", "data": " warnings.warn(\"Detected call of `lr_scheduler.step()` before `optimizer.step()`. \"\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "train", "progress": [0, 10000]}, "pipe": "data"}
+{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py:456: UserWarning: Applied workaround for CuDNN issue, install nvrtc.so (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:80.)\n", "pipe": "stderr"}
+{"event": "line", "data": " return F.conv2d(input, weight, bias, self.stride,\n", "pipe": "stderr"}
+{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/functional.py:4296: UserWarning: Default grid_sample and affine_grid behavior has changed to align_corners=False since 1.3.0. Please specify align_corners=True if the old behavior is desired. See the documentation of grid_sample for details.\n", "pipe": "stderr"}
+{"event": "line", "data": " warnings.warn(\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24438.375, 24512.0], "load": 0.09, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 484.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 73.62 MiB is free. Including non-PyTorch memory, this process has 21.53 GiB memory in use. Of the allocated memory 20.51 GiB is allocated by PyTorch, and 765.90 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"}
+{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"}
+{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"}
+{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"}
+{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/super-slomo/slomo/train.py\", line 274, in \n", "pipe": "stderr"}
+{"event": "line", "data": " main()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/super-slomo/slomo/train.py\", line 219, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " intrpOut = ArbTimeFlowIntrp(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/super-slomo/slomo/model.py\", line 209, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " x = self.up5(x, s1)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/super-slomo/slomo/model.py\", line 139, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " x = F.leaky_relu(self.conv2(torch.cat((x, skpCn), 1)), negative_slope=0.1)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py\", line 460, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._conv_forward(input, self.weight, self.bias)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py\", line 456, in _conv_forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return F.conv2d(input, weight, bias, self.stride,\n", "pipe": "stderr"}
+{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 484.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 73.62 MiB is free. Including non-PyTorch memory, this process has 21.53 GiB memory in use. Of the allocated memory 20.51 GiB is allocated by PyTorch, and 765.90 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/super-slomo/voirconf-super-slomo.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/super-slomo/slomo/train.py", "--train_batch_size", "32"], "time": 1712623681.6581614, "return_code": 1}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/super-slomo.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/super-slomo.D1.data
new file mode 100644
index 000000000..dec5c3bdb
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/super-slomo.D1.data
@@ -0,0 +1,59 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/super-slomo", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "super-slomo", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "tags": ["convnet", "unet", "video-interpolation", "vision"], "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/super-slomo", "plan": {"method": "per_gpu"}, "argv": {"--train_batch_size": 32}, "weight": 1.0, "name": "super-slomo", "tag": ["super-slomo", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712623677.008645, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/super-slomo/voirconf-super-slomo.D1-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/super-slomo/slomo/train.py", "--train_batch_size", "32"], "time": 1712623677.0334249}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead.\n", "pipe": "stderr"}
+{"event": "line", "data": " warnings.warn(\n", "pipe": "stderr"}
+{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=VGG16_Weights.IMAGENET1K_V1`. You can also use `weights=VGG16_Weights.DEFAULT` to get the most up-to-date weights.\n", "pipe": "stderr"}
+{"event": "line", "data": " warnings.warn(msg)\n", "pipe": "stderr"}
+{"event": "line", "data": "Epoch: 0\n", "pipe": "stdout"}
+{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/optim/lr_scheduler.py:136: UserWarning: Detected call of `lr_scheduler.step()` before `optimizer.step()`. In PyTorch 1.1.0 and later, you should call them in the opposite order: `optimizer.step()` before `lr_scheduler.step()`. Failure to do this will result in PyTorch skipping the first value of the learning rate schedule. See more details at https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate\n", "pipe": "stderr"}
+{"event": "line", "data": " warnings.warn(\"Detected call of `lr_scheduler.step()` before `optimizer.step()`. \"\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "train", "progress": [0, 10000]}, "pipe": "data"}
+{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py:456: UserWarning: Applied workaround for CuDNN issue, install nvrtc.so (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:80.)\n", "pipe": "stderr"}
+{"event": "line", "data": " return F.conv2d(input, weight, bias, self.stride,\n", "pipe": "stderr"}
+{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/functional.py:4296: UserWarning: Default grid_sample and affine_grid behavior has changed to align_corners=False since 1.3.0. Please specify align_corners=True if the old behavior is desired. See the documentation of grid_sample for details.\n", "pipe": "stderr"}
+{"event": "line", "data": " warnings.warn(\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24438.375, 24512.0], "load": 0.06, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 484.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 73.62 MiB is free. Including non-PyTorch memory, this process has 21.53 GiB memory in use. Of the allocated memory 20.51 GiB is allocated by PyTorch, and 765.90 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"}
+{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"}
+{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"}
+{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"}
+{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/super-slomo/slomo/train.py\", line 274, in \n", "pipe": "stderr"}
+{"event": "line", "data": " main()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/super-slomo/slomo/train.py\", line 219, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " intrpOut = ArbTimeFlowIntrp(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/super-slomo/slomo/model.py\", line 209, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " x = self.up5(x, s1)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/super-slomo/slomo/model.py\", line 139, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " x = F.leaky_relu(self.conv2(torch.cat((x, skpCn), 1)), negative_slope=0.1)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py\", line 460, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._conv_forward(input, self.weight, self.bias)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py\", line 456, in _conv_forward\n", "pipe": "stderr"}
+{"event": "line", "data": " return F.conv2d(input, weight, bias, self.stride,\n", "pipe": "stderr"}
+{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 484.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 73.62 MiB is free. Including non-PyTorch memory, this process has 21.53 GiB memory in use. Of the allocated memory 20.51 GiB is allocated by PyTorch, and 765.90 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/super-slomo/voirconf-super-slomo.D1-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/super-slomo/slomo/train.py", "--train_batch_size", "32"], "time": 1712623681.5408883, "return_code": 1}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/t5.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/t5.D0.data
new file mode 100644
index 000000000..654b729d4
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/t5.D0.data
@@ -0,0 +1,64 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "hf", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "argv": {"--precision": "tf32-fp16", "--num-workers": 8, "--model": "T5", "--batch-size": 16}, "plan": {"method": "per_gpu"}, "tags": ["huggingface", "language-modeling", "nlp", "transformer"], "weight": 2.0, "name": "t5", "tag": ["t5", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712623446.544149, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-t5.D0-0efae956f1553a76c1e03985181900f5.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "T5", "--batch-size", "16"], "time": 1712623448.9517653}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24078.375, 24512.0], "load": 0.03, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 512.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 433.62 MiB is free. Including non-PyTorch memory, this process has 21.18 GiB memory in use. Of the allocated memory 20.75 GiB is allocated by PyTorch, and 157.31 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"}
+{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"}
+{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"}
+{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"}
+{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 156, in \n", "pipe": "stderr"}
+{"event": "line", "data": " main()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 152, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " runner.train()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 70, in train\n", "pipe": "stderr"}
+{"event": "line", "data": " self.step(data)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 55, in step\n", "pipe": "stderr"}
+{"event": "line", "data": " outputs = self.model(**data)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/t5/modeling_t5.py\", line 1746, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " decoder_outputs = self.decoder(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/t5/modeling_t5.py\", line 1113, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " layer_outputs = layer_module(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/t5/modeling_t5.py\", line 694, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " self_attention_outputs = self.layer[0](\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/t5/modeling_t5.py\", line 601, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " attention_output = self.SelfAttention(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/t5/modeling_t5.py\", line 561, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " attn_weights = nn.functional.softmax(scores.float(), dim=-1).type_as(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/functional.py\", line 1856, in softmax\n", "pipe": "stderr"}
+{"event": "line", "data": " ret = input.softmax(dim)\n", "pipe": "stderr"}
+{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 512.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 433.62 MiB is free. Including non-PyTorch memory, this process has 21.18 GiB memory in use. Of the allocated memory 20.75 GiB is allocated by PyTorch, and 157.31 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-t5.D0-0efae956f1553a76c1e03985181900f5.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "T5", "--batch-size", "16"], "time": 1712623452.466748, "return_code": 1}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/t5.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/t5.D1.data
new file mode 100644
index 000000000..67f63f1bc
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/t5.D1.data
@@ -0,0 +1,64 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "hf", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "argv": {"--precision": "tf32-fp16", "--num-workers": 8, "--model": "T5", "--batch-size": 16}, "plan": {"method": "per_gpu"}, "tags": ["huggingface", "language-modeling", "nlp", "transformer"], "weight": 2.0, "name": "t5", "tag": ["t5", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712623448.934645, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-t5.D1-0efae956f1553a76c1e03985181900f5.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "T5", "--batch-size", "16"], "time": 1712623448.9603055}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24078.375, 24512.0], "load": 0.05, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 512.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 433.62 MiB is free. Including non-PyTorch memory, this process has 21.18 GiB memory in use. Of the allocated memory 20.75 GiB is allocated by PyTorch, and 157.31 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"}
+{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"}
+{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"}
+{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"}
+{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 156, in \n", "pipe": "stderr"}
+{"event": "line", "data": " main()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 152, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " runner.train()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 70, in train\n", "pipe": "stderr"}
+{"event": "line", "data": " self.step(data)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 55, in step\n", "pipe": "stderr"}
+{"event": "line", "data": " outputs = self.model(**data)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/t5/modeling_t5.py\", line 1746, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " decoder_outputs = self.decoder(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/t5/modeling_t5.py\", line 1113, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " layer_outputs = layer_module(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/t5/modeling_t5.py\", line 694, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " self_attention_outputs = self.layer[0](\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/t5/modeling_t5.py\", line 601, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " attention_output = self.SelfAttention(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/t5/modeling_t5.py\", line 561, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " attn_weights = nn.functional.softmax(scores.float(), dim=-1).type_as(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/functional.py\", line 1856, in softmax\n", "pipe": "stderr"}
+{"event": "line", "data": " ret = input.softmax(dim)\n", "pipe": "stderr"}
+{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 512.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 433.62 MiB is free. Including non-PyTorch memory, this process has 21.18 GiB memory in use. Of the allocated memory 20.75 GiB is allocated by PyTorch, and 157.31 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-t5.D1-0efae956f1553a76c1e03985181900f5.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "T5", "--batch-size", "16"], "time": 1712623452.54347, "return_code": 1}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/tf32.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/tf32.D0.data
new file mode 100644
index 000000000..157549472
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/tf32.D0.data
@@ -0,0 +1,183 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/flops", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "flops", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops", "plan": {"method": "per_gpu"}, "tags": ["diagnostic", "flops"], "argv": {"--number": 10, "--repeat": 90, "--m": 8192, "--n": 8192, "--dtype": "fp32", "--tf32": true}, "weight": 0.0, "name": "tf32", "tag": ["tf32", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712623091.8331, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "10", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "fp32", "--tf32"], "time": 1712623094.2285924}, "pipe": null}
+{"event": "data", "data": {"task": "train", "rate": 43.8435710265474, "units": "Tflops", "t": 1712623096.0294204}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2393.125, 24512.0], "load": 0, "temperature": null, "power": null}}, "t": 1712623095.4086173}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.06, "temperature": null, "power": null}}, "t": 1712623095.9150257}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.471589267725115, "units": "Tflops", "t": 1712623096.524578}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.25, "temperature": null, "power": null}}, "t": 1712623096.4205604}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.359358890797566, "units": "Tflops", "t": 1712623097.0095093}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.45, "temperature": null, "power": null}}, "t": 1712623096.9260511}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.01994795353335, "units": "Tflops", "t": 1712623097.5091982}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.58, "temperature": null, "power": null}}, "t": 1712623097.431647}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.15483639488094, "units": "Tflops", "t": 1712623097.9967682}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.78, "temperature": null, "power": null}}, "t": 1712623097.9371946}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.371336189899424, "units": "Tflops", "t": 1712623098.492664}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.91, "temperature": null, "power": null}}, "t": 1712623098.442748}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.53841862809422, "units": "Tflops", "t": 1712623098.9865313}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623098.9483185}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.534891759203454, "units": "Tflops", "t": 1712623099.4804626}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623099.4538426}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.87950966310718, "units": "Tflops", "t": 1712623099.9708643}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623099.9593818}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.63869445605604, "units": "Tflops", "t": 1712623100.4641762}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.775759037463295, "units": "Tflops", "t": 1712623100.95546}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623100.4648595}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.70724087934804, "units": "Tflops", "t": 1712623101.4475007}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623100.9703112}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.31407498570542, "units": "Tflops", "t": 1712623101.9439554}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623101.4758046}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.24532709354406, "units": "Tflops", "t": 1712623102.4411163}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623101.9812956}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.582336173192594, "units": "Tflops", "t": 1712623102.9347699}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623102.486918}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.533859610557016, "units": "Tflops", "t": 1712623103.4287744}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623102.9928212}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.73189365875823, "units": "Tflops", "t": 1712623103.920504}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623103.4982536}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.44962155420787, "units": "Tflops", "t": 1712623104.4153507}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623104.003749}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.6805156477458, "units": "Tflops", "t": 1712623104.907686}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623104.5092206}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.46580054426478, "units": "Tflops", "t": 1712623105.4026744}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623105.015092}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.17861194251971, "units": "Tflops", "t": 1712623105.9005663}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623105.5206554}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.49752838022492, "units": "Tflops", "t": 1712623106.3951027}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623106.026228}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.552854503360656, "units": "Tflops", "t": 1712623106.8889887}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623106.5317614}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.21919349541802, "units": "Tflops", "t": 1712623107.3866274}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623107.037369}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.9911588931869, "units": "Tflops", "t": 1712623107.875701}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623107.5428312}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.75668223287567, "units": "Tflops", "t": 1712623108.3673098}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623108.0484407}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 43.81239397954099, "units": "Tflops", "t": 1712623108.8695068}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623108.553901}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.82652855968339, "units": "Tflops", "t": 1712623109.360327}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623109.0593958}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.47343339984925, "units": "Tflops", "t": 1712623109.8549066}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623109.564936}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.55758961450895, "units": "Tflops", "t": 1712623110.3485708}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623110.0703964}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.75577008241259, "units": "Tflops", "t": 1712623110.8400342}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623110.5758479}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.75409790312376, "units": "Tflops", "t": 1712623111.331513}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623111.0813398}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.357444333507466, "units": "Tflops", "t": 1712623111.8273902}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623111.5869372}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.638845684245666, "units": "Tflops", "t": 1712623112.3202424}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623112.0924084}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.20177411880263, "units": "Tflops", "t": 1712623112.817862}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623112.5978737}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.34991521230061, "units": "Tflops", "t": 1712623113.3139157}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623113.1034243}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.49606863616717, "units": "Tflops", "t": 1712623113.8082895}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623113.6089637}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.68962982148018, "units": "Tflops", "t": 1712623114.3007636}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623114.114521}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.12784184112692, "units": "Tflops", "t": 1712623114.799414}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623114.620103}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.759983659624204, "units": "Tflops", "t": 1712623115.290931}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623115.1256626}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 43.89751173474453, "units": "Tflops", "t": 1712623115.7920356}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623115.6310794}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.89126141086514, "units": "Tflops", "t": 1712623116.2820249}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623116.136676}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.19150269893531, "units": "Tflops", "t": 1712623116.7797778}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623116.6422555}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.39747901448011, "units": "Tflops", "t": 1712623117.2752066}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623117.1478684}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.273807969226915, "units": "Tflops", "t": 1712623117.7721224}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623117.653514}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.790436206455894, "units": "Tflops", "t": 1712623118.263283}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623118.1591587}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.54667884823142, "units": "Tflops", "t": 1712623118.7570596}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623118.664685}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.1924978647532, "units": "Tflops", "t": 1712623119.2547846}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623119.1702466}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.20099035771395, "units": "Tflops", "t": 1712623119.7524133}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623119.675777}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.14304793874101, "units": "Tflops", "t": 1712623120.2506862}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623120.1814673}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.077061913061236, "units": "Tflops", "t": 1712623120.7497108}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623120.686945}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 43.944570217020996, "units": "Tflops", "t": 1712623121.250237}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623121.1923978}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 43.92383114296778, "units": "Tflops", "t": 1712623121.7510133}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623121.6981134}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.34072589736604, "units": "Tflops", "t": 1712623122.247069}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623122.2036312}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.57496748403367, "units": "Tflops", "t": 1712623122.7405236}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623122.7090547}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.232553409048414, "units": "Tflops", "t": 1712623123.23779}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623123.2145367}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.66759764488671, "units": "Tflops", "t": 1712623123.7302427}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623123.7200105}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.524378983365345, "units": "Tflops", "t": 1712623124.2242577}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 43.866320542025534, "units": "Tflops", "t": 1712623124.7256444}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623124.2255557}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.49568224935959, "units": "Tflops", "t": 1712623125.2203665}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623124.7312415}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 43.47405089343643, "units": "Tflops", "t": 1712623125.7264655}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623125.236843}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 43.878820923091595, "units": "Tflops", "t": 1712623126.2278664}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623125.74243}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.26507503524926, "units": "Tflops", "t": 1712623126.725049}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623126.2478912}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.32378575532784, "units": "Tflops", "t": 1712623127.2215304}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623126.7533865}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.01744800182675, "units": "Tflops", "t": 1712623127.7214098}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623127.2590888}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.32730056933882, "units": "Tflops", "t": 1712623128.2176178}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623127.764734}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.0058343039614, "units": "Tflops", "t": 1712623128.7176442}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623128.2702048}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.248001589154356, "units": "Tflops", "t": 1712623129.2148857}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623128.7757044}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 43.68022438717206, "units": "Tflops", "t": 1712623129.7187438}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623129.2814033}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.13580261595541, "units": "Tflops", "t": 1712623130.2173321}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623129.7868505}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.625951761861835, "units": "Tflops", "t": 1712623130.710333}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623130.292292}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.164650153537075, "units": "Tflops", "t": 1712623131.2083778}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623130.7978234}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.39279923671712, "units": "Tflops", "t": 1712623131.703878}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623131.303362}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 43.34959541158024, "units": "Tflops", "t": 1712623132.2113159}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623131.8087897}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.63249498239682, "units": "Tflops", "t": 1712623132.704337}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623132.3143435}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.20084208171567, "units": "Tflops", "t": 1712623133.2019727}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623132.8199859}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.34025693935622, "units": "Tflops", "t": 1712623133.6981366}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623133.325479}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.2284173351081, "units": "Tflops", "t": 1712623134.1954625}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623133.830965}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 43.571088618473695, "units": "Tflops", "t": 1712623134.7002895}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623134.3366957}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.326405836522156, "units": "Tflops", "t": 1712623135.1965156}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623134.8422031}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.089809163095964, "units": "Tflops", "t": 1712623135.6954353}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623135.3476462}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 43.851909080277544, "units": "Tflops", "t": 1712623136.1970305}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623135.853093}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.30592207821016, "units": "Tflops", "t": 1712623136.6934912}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623136.358571}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 43.84855263232728, "units": "Tflops", "t": 1712623137.1951172}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623136.8640428}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 43.865173117791414, "units": "Tflops", "t": 1712623137.6966076}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623137.369588}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.414774970468116, "units": "Tflops", "t": 1712623138.1918411}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623137.8751435}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.03175828649601, "units": "Tflops", "t": 1712623138.6913884}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623138.3808029}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 43.832548818853724, "units": "Tflops", "t": 1712623139.193241}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623138.8862462}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.24095520180687, "units": "Tflops", "t": 1712623139.690435}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623139.391826}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 43.665128705171554, "units": "Tflops", "t": 1712623140.1941767}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623139.8972795}, "pipe": "data"}
+{"event": "end", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "10", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "fp32", "--tf32"], "time": 1712623140.805768, "return_code": 0}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/tf32.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/tf32.D1.data
new file mode 100644
index 000000000..7c7100da1
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/tf32.D1.data
@@ -0,0 +1,183 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/flops", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "flops", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops", "plan": {"method": "per_gpu"}, "tags": ["diagnostic", "flops"], "argv": {"--number": 10, "--repeat": 90, "--m": 8192, "--n": 8192, "--dtype": "fp32", "--tf32": true}, "weight": 0.0, "name": "tf32", "tag": ["tf32", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712623094.2193, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "10", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "fp32", "--tf32"], "time": 1712623094.2290885}, "pipe": null}
+{"event": "data", "data": {"task": "train", "rate": 43.73046674857098, "units": "Tflops", "t": 1712623096.0125487}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2393.125, 24512.0], "load": 0, "temperature": null, "power": null}}, "t": 1712623095.3517134}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.1, "temperature": null, "power": null}}, "t": 1712623095.8576996}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.542892533702954, "units": "Tflops", "t": 1712623096.50686}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.23, "temperature": null, "power": null}}, "t": 1712623096.363203}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 45.28519753160594, "units": "Tflops", "t": 1712623096.9925706}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.43, "temperature": null, "power": null}}, "t": 1712623096.8689373}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.0508741849975, "units": "Tflops", "t": 1712623097.4919267}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.56, "temperature": null, "power": null}}, "t": 1712623097.3747633}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.87645259628623, "units": "Tflops", "t": 1712623097.9831727}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.76, "temperature": null, "power": null}}, "t": 1712623097.8802729}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.41024122919766, "units": "Tflops", "t": 1712623098.4784653}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.89, "temperature": null, "power": null}}, "t": 1712623098.385769}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.833348662100924, "units": "Tflops", "t": 1712623098.9690645}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623098.891298}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.65927090061946, "units": "Tflops", "t": 1712623099.4615808}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623099.3968325}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.510885191135124, "units": "Tflops", "t": 1712623099.9558082}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623099.9023397}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.596433961233544, "units": "Tflops", "t": 1712623100.4497175}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623100.4081955}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.69974419408869, "units": "Tflops", "t": 1712623100.941962}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623100.913711}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.54237626795423, "units": "Tflops", "t": 1712623101.4357605}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623101.4192305}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.65254688790535, "units": "Tflops", "t": 1712623101.9284685}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623101.9250765}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.461234947901396, "units": "Tflops", "t": 1712623102.4233258}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.595248024068646, "units": "Tflops", "t": 1712623102.9165077}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623102.4305787}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.697231417231194, "units": "Tflops", "t": 1712623103.4087205}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623102.936197}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.46697960644611, "units": "Tflops", "t": 1712623103.9033675}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623103.4417894}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.52291747713375, "units": "Tflops", "t": 1712623104.3975277}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623103.9472976}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.446794118662204, "units": "Tflops", "t": 1712623104.8923955}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623104.4529493}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.38053819808125, "units": "Tflops", "t": 1712623105.388112}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623104.9585474}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 43.82174035492504, "units": "Tflops", "t": 1712623105.8900435}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623105.4642127}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.53067745338433, "units": "Tflops", "t": 1712623106.3841202}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623105.969843}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.53910686276721, "units": "Tflops", "t": 1712623106.8779542}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623106.4753597}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.50444199844907, "units": "Tflops", "t": 1712623107.3721845}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623106.980957}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.441654248214554, "units": "Tflops", "t": 1712623107.867226}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623107.486554}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.4004497962744, "units": "Tflops", "t": 1712623108.3626034}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623107.9920566}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.28279947828187, "units": "Tflops", "t": 1712623108.859308}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623108.4976153}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.54035434232721, "units": "Tflops", "t": 1712623109.3532798}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623109.0032525}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.44159000735656, "units": "Tflops", "t": 1712623109.8482015}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623109.5090666}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 43.95271633725987, "units": "Tflops", "t": 1712623110.3486235}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623110.0146992}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.51894180603004, "units": "Tflops", "t": 1712623110.8428059}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623110.5202694}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 43.91517298659113, "units": "Tflops", "t": 1712623111.3436496}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623111.0257545}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.66792212633967, "units": "Tflops", "t": 1712623111.8360648}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623111.5313456}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.879706203079785, "units": "Tflops", "t": 1712623112.3261516}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623112.0371003}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.77410709941594, "units": "Tflops", "t": 1712623112.8175516}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623112.5427928}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.83735889224532, "units": "Tflops", "t": 1712623113.3081036}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623113.0483916}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.0926124397762, "units": "Tflops", "t": 1712623113.8069518}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623113.5538704}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.91743004965787, "units": "Tflops", "t": 1712623114.2966278}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623114.0594296}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.59235891390476, "units": "Tflops", "t": 1712623114.7898748}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623114.5650709}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.98973241800559, "units": "Tflops", "t": 1712623115.278886}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623115.0705957}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.20863851156087, "units": "Tflops", "t": 1712623115.7764208}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623115.5762696}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.89099922299202, "units": "Tflops", "t": 1712623116.2663915}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623116.0819058}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.61321634072605, "units": "Tflops", "t": 1712623116.7594018}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623116.587501}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.61876291018303, "units": "Tflops", "t": 1712623117.2523575}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623117.0932891}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.670647956676476, "units": "Tflops", "t": 1712623117.744905}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623117.5989318}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.81653094710899, "units": "Tflops", "t": 1712623118.2356827}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623118.1045806}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.74419768761328, "units": "Tflops", "t": 1712623118.7272546}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623118.6100686}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.70992816478444, "units": "Tflops", "t": 1712623119.2191994}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623119.115859}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.65886005076648, "units": "Tflops", "t": 1712623119.7117229}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623119.6215947}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.56874262482913, "units": "Tflops", "t": 1712623120.2053523}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623120.127231}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.67159991231088, "units": "Tflops", "t": 1712623120.6977284}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623120.6329694}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.64212974907796, "units": "Tflops", "t": 1712623121.1904216}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623121.1384916}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.63096158037835, "units": "Tflops", "t": 1712623121.6832457}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623121.644007}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.58882349742874, "units": "Tflops", "t": 1712623122.1765378}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623122.1497154}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.66513173989785, "units": "Tflops", "t": 1712623122.6689835}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623122.655322}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.51556841503254, "units": "Tflops", "t": 1712623123.1632376}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623123.160865}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.63087519466743, "units": "Tflops", "t": 1712623123.6560695}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.6349788852825, "units": "Tflops", "t": 1712623124.1487935}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623123.6664689}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.715932380751134, "units": "Tflops", "t": 1712623124.640681}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623124.1722424}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 43.57757320484118, "units": "Tflops", "t": 1712623125.1454055}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623124.677736}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.735950853333186, "units": "Tflops", "t": 1712623125.637069}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623125.1835349}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.62856450101188, "units": "Tflops", "t": 1712623126.1299117}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623125.6890228}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 43.56944204668812, "units": "Tflops", "t": 1712623126.6348674}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623126.1946452}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.47971746345521, "units": "Tflops", "t": 1712623127.1293674}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623126.7001681}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.611015360771056, "units": "Tflops", "t": 1712623127.622403}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623127.2061338}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.5445919929932, "units": "Tflops", "t": 1712623128.1161737}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623127.7116513}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.380089751922156, "units": "Tflops", "t": 1712623128.6117697}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623128.217299}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.554705381539044, "units": "Tflops", "t": 1712623129.105427}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623128.7228985}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.57546296275335, "units": "Tflops", "t": 1712623129.598863}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623129.2287858}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 43.81953350139618, "units": "Tflops", "t": 1712623130.1008215}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623129.7343214}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.60150185790077, "units": "Tflops", "t": 1712623130.5941255}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623130.239892}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.55644879115871, "units": "Tflops", "t": 1712623131.0877757}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623130.7454426}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.7192928443037, "units": "Tflops", "t": 1712623131.5796204}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623131.2509398}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.442382324251874, "units": "Tflops", "t": 1712623132.074534}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623131.7566164}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.51668565988643, "units": "Tflops", "t": 1712623132.5686152}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623132.2623246}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.52844161498417, "units": "Tflops", "t": 1712623133.062567}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623132.7678783}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.54938990062055, "units": "Tflops", "t": 1712623133.556289}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623133.273382}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.478559174905016, "units": "Tflops", "t": 1712623134.0507982}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623133.7794974}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.50323947574267, "units": "Tflops", "t": 1712623134.545032}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623134.2851014}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.50238057073835, "units": "Tflops", "t": 1712623135.0394018}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623134.7907135}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.60439215295539, "units": "Tflops", "t": 1712623135.5325139}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623135.2964766}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 43.904156550370054, "units": "Tflops", "t": 1712623136.033496}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623135.8020573}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.678524446759354, "units": "Tflops", "t": 1712623136.5257902}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623136.307555}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.33642038275346, "units": "Tflops", "t": 1712623137.0218756}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623136.8131287}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.643923225301506, "units": "Tflops", "t": 1712623137.51455}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623137.3187313}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.2704291262752, "units": "Tflops", "t": 1712623138.0113883}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623137.8242586}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.68737799333411, "units": "Tflops", "t": 1712623138.5035825}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623138.3299472}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 43.31498693912936, "units": "Tflops", "t": 1712623139.011369}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623138.8354478}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.78143306873124, "units": "Tflops", "t": 1712623139.5025263}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623139.3411546}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "rate": 44.228332500661864, "units": "Tflops", "t": 1712623139.9998345}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623139.8466687}, "pipe": "data"}
+{"event": "end", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "10", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "fp32", "--tf32"], "time": 1712623140.745916, "return_code": 0}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/whisper.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/whisper.D0.data
new file mode 100644
index 000000000..9e1d236e9
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/whisper.D0.data
@@ -0,0 +1,61 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "hf", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "argv": {"--precision": "tf32-fp16", "--num-workers": 8, "--model": "Whisper", "--batch-size": 64}, "plan": {"method": "per_gpu"}, "tags": ["audio", "huggingface"], "weight": 1.0, "name": "whisper", "tag": ["whisper", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712623464.110328, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-whisper.D0-0efae956f1553a76c1e03985181900f5.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "Whisper", "--batch-size", "64"], "time": 1712623466.5244505}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2672.375, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2672.375, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2672.375, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py:306: UserWarning: Applied workaround for CuDNN issue, install nvrtc.so (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:80.)\n", "pipe": "stderr"}
+{"event": "line", "data": " return F.conv1d(input, weight, bias, self.stride,\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [23104.375, 24512.0], "load": 0.05, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 1.61 GiB. GPU 0 has a total capacty of 23.73 GiB of which 1.37 GiB is free. Including non-PyTorch memory, this process has 20.23 GiB memory in use. Of the allocated memory 19.85 GiB is allocated by PyTorch, and 92.33 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"}
+{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"}
+{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"}
+{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"}
+{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 156, in \n", "pipe": "stderr"}
+{"event": "line", "data": " main()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 152, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " runner.train()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 70, in train\n", "pipe": "stderr"}
+{"event": "line", "data": " self.step(data)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 55, in step\n", "pipe": "stderr"}
+{"event": "line", "data": " outputs = self.model(**data)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/whisper/modeling_whisper.py\", line 2393, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " encoder_outputs = self.encoder(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/whisper/modeling_whisper.py\", line 1159, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " layer_outputs = encoder_layer(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/whisper/modeling_whisper.py\", line 722, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " hidden_states, attn_weights, _ = self.self_attn(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/whisper/modeling_whisper.py\", line 413, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " attn_weights = torch.bmm(query_states, key_states.transpose(1, 2))\n", "pipe": "stderr"}
+{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 1.61 GiB. GPU 0 has a total capacty of 23.73 GiB of which 1.37 GiB is free. Including non-PyTorch memory, this process has 20.23 GiB memory in use. Of the allocated memory 19.85 GiB is allocated by PyTorch, and 92.33 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-whisper.D0-0efae956f1553a76c1e03985181900f5.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "Whisper", "--batch-size", "64"], "time": 1712623479.6472666, "return_code": 1}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/whisper.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/whisper.D1.data
new file mode 100644
index 000000000..5f5a74183
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/whisper.D1.data
@@ -0,0 +1,61 @@
+{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "hf", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "argv": {"--precision": "tf32-fp16", "--num-workers": 8, "--model": "Whisper", "--batch-size": 64}, "plan": {"method": "per_gpu"}, "tags": ["audio", "huggingface"], "weight": 1.0, "name": "whisper", "tag": ["whisper", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null}
+{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712623466.506795, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null}
+{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-whisper.D1-0efae956f1553a76c1e03985181900f5.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "Whisper", "--batch-size", "64"], "time": 1712623466.5323417}, "pipe": null}
+{"event": "phase", "data": {"name": "init"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"}
+{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2672.375, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2672.375, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2672.375, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py:306: UserWarning: Applied workaround for CuDNN issue, install nvrtc.so (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:80.)\n", "pipe": "stderr"}
+{"event": "line", "data": " return F.conv1d(input, weight, bias, self.stride,\n", "pipe": "stderr"}
+{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [23104.375, 24512.0], "load": 0.05, "temperature": null, "power": null}}}, "pipe": "data"}
+{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 1.61 GiB. GPU 0 has a total capacty of 23.73 GiB of which 1.37 GiB is free. Including non-PyTorch memory, this process has 20.23 GiB memory in use. Of the allocated memory 19.85 GiB is allocated by PyTorch, and 92.33 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"}
+{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"}
+{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"}
+{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"}
+{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"}
+{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"}
+{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 156, in \n", "pipe": "stderr"}
+{"event": "line", "data": " main()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 152, in main\n", "pipe": "stderr"}
+{"event": "line", "data": " runner.train()\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 70, in train\n", "pipe": "stderr"}
+{"event": "line", "data": " self.step(data)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 55, in step\n", "pipe": "stderr"}
+{"event": "line", "data": " outputs = self.model(**data)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/whisper/modeling_whisper.py\", line 2393, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " encoder_outputs = self.encoder(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/whisper/modeling_whisper.py\", line 1159, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " layer_outputs = encoder_layer(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/whisper/modeling_whisper.py\", line 722, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " hidden_states, attn_weights, _ = self.self_attn(\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"}
+{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"}
+{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/whisper/modeling_whisper.py\", line 413, in forward\n", "pipe": "stderr"}
+{"event": "line", "data": " attn_weights = torch.bmm(query_states, key_states.transpose(1, 2))\n", "pipe": "stderr"}
+{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 1.61 GiB. GPU 0 has a total capacty of 23.73 GiB of which 1.37 GiB is free. Including non-PyTorch memory, this process has 20.23 GiB memory in use. Of the allocated memory 19.85 GiB is allocated by PyTorch, and 92.33 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"}
+{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-whisper.D1-0efae956f1553a76c1e03985181900f5.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "Whisper", "--batch-size", "64"], "time": 1712623479.3414428, "return_code": 1}, "pipe": null}
diff --git a/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/README.md b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/README.md
new file mode 100644
index 000000000..8ad190f15
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/README.md
@@ -0,0 +1,38 @@
+```
+=================
+Benchmark results
+=================
+ fail n perf sem% std% peak_memory score weight
+bert-fp16 0 3 150.27 0.1% 0.7% 24616 300.011282 0.00
+bert-fp32 0 3 27.25 0.2% 2.2% 31580 54.455513 0.00
+bert-tf32 0 3 115.27 0.1% 1.2% 31582 230.122251 0.00
+bert-tf32-fp16 0 3 149.22 0.1% 0.8% 24616 297.987446 3.00
+bf16 0 3 268.52 0.2% 2.9% 1804 536.612601 0.00
+convnext_large-fp16 0 3 306.17 1.2% 11.5% 27478 614.515028 0.00
+convnext_large-fp32 0 3 42.77 1.3% 12.8% 49598 85.028380 0.00
+convnext_large-tf32 0 3 124.43 2.3% 21.7% 49598 249.263266 0.00
+convnext_large-tf32-fp16 0 3 308.44 1.2% 11.3% 27478 616.772433 3.00
+davit_large 0 3 291.78 0.4% 5.9% 34016 583.029676 1.00
+davit_large-multi 0 2 431.63 3.0% 32.8% 37565 431.631903 5.00
+dlrm 0 2 461813.93 1.6% 17.0% 7120 461813.929762 1.00
+focalnet 0 3 378.80 0.5% 6.2% 26078 760.320817 2.00
+fp16 0 3 252.99 0.1% 1.3% 1804 506.831795 0.00
+fp32 0 3 18.95 0.1% 1.3% 2182 37.869296 0.00
+llama 0 3 471.88 7.1% 77.6% 28442 921.048609 1.00
+reformer 0 3 55.41 0.2% 2.6% 25420 110.828259 1.00
+regnet_y_128gf 0 3 78.30 0.6% 7.9% 31570 156.621543 2.00
+resnet152 0 3 633.17 0.5% 6.7% 35443 1265.788254 1.00
+resnet152-multi 0 2 946.21 3.1% 34.0% 43101 946.214334 5.00
+resnet50 0 3 992.36 1.2% 16.3% 4746 1983.133170 1.00
+rwkv 3 3 NaN NaN NaN 1574 NaN 1.00
+stargan 0 3 38.04 2.0% 27.0% 37442 75.945645 1.00
+super-slomo 0 3 41.08 0.6% 8.5% 33816 82.433797 1.00
+t5 0 3 46.30 0.3% 4.3% 35460 92.612285 2.00
+tf32 0 3 132.81 0.1% 1.3% 2182 265.201409 0.00
+whisper 0 3 214.88 0.1% 1.4% 36740 428.640005 1.00
+
+Scores
+------
+Failure rate: 3.85% (FAIL)
+Score: 427.84
+```
diff --git a/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/badge.svg b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/badge.svg
new file mode 100644
index 000000000..544903e36
--- /dev/null
+++ b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/badge.svg
@@ -0,0 +1 @@
+